From f40ae12519f52b32a78dd60e87fe69e4f3fa12ce Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 20:00:55 -0500 Subject: [PATCH 01/21] Move compiler to its own file --- lib/syntax_tree.rb | 4 +- lib/syntax_tree/compiler.rb | 2737 +++++++++++++++++++++++++++ lib/syntax_tree/visitor/compiler.rb | 2719 -------------------------- test/compiler_test.rb | 21 +- 4 files changed, 2743 insertions(+), 2738 deletions(-) create mode 100644 lib/syntax_tree/compiler.rb delete mode 100644 lib/syntax_tree/visitor/compiler.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index aea21d8e..c62132e6 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "etc" +require "fiddle" require "json" require "pp" require "prettier_print" @@ -13,7 +14,6 @@ require_relative "syntax_tree/basic_visitor" require_relative "syntax_tree/visitor" -require_relative "syntax_tree/visitor/compiler" require_relative "syntax_tree/visitor/field_visitor" require_relative "syntax_tree/visitor/json_visitor" require_relative "syntax_tree/visitor/match_visitor" @@ -26,6 +26,8 @@ require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" +require_relative "syntax_tree/compiler" + # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the # tools necessary to inspect and manipulate that syntax tree. It can be used to diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb new file mode 100644 index 00000000..d9b7e787 --- /dev/null +++ b/lib/syntax_tree/compiler.rb @@ -0,0 +1,2737 @@ +# frozen_string_literal: true + +module SyntaxTree + # This class is an experiment in transforming Syntax Tree nodes into their + # corresponding YARV instruction sequences. It attempts to mirror the + # behavior of RubyVM::InstructionSequence.compile. + # + # You use this as with any other visitor. First you parse code into a tree, + # then you visit it with this compiler. Visiting the root node of the tree + # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. + # With that object you can call #to_a on it, which will return a serialized + # form of the instruction sequence as an array. This array _should_ mirror + # the array given by RubyVM::InstructionSequence#to_a. + # + # As an example, here is how you would compile a single expression: + # + # program = SyntaxTree.parse("1 + 2") + # program.accept(SyntaxTree::Visitor::Compiler.new).to_a + # + # [ + # "YARVInstructionSequence/SimpleDataFormat", + # 3, + # 1, + # 1, + # {:arg_size=>0, :local_size=>0, :stack_max=>2}, + # "", + # "", + # "", + # 1, + # :top, + # [], + # {}, + # [], + # [ + # [:putobject_INT2FIX_1_], + # [:putobject, 2], + # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], + # [:leave] + # ] + # ] + # + # Note that this is the same output as calling: + # + # RubyVM::InstructionSequence.compile("1 + 2").to_a + # + class Compiler < BasicVisitor + # This visitor is responsible for converting Syntax Tree nodes into their + # corresponding Ruby structures. This is used to convert the operands of + # some instructions like putobject that push a Ruby object directly onto + # the stack. It is only used when the entire structure can be represented + # at compile-time, as opposed to constructed at run-time. + class RubyVisitor < BasicVisitor + # This error is raised whenever a node cannot be converted into a Ruby + # object at compile-time. + class CompilationError < StandardError + end + + # This will attempt to compile the given node. If it's possible, then + # it will return the compiled object. Otherwise it will return nil. + def self.compile(node) + node.accept(new) + rescue CompilationError + end + + def visit_array(node) + visit_all(node.contents.parts) + end + + def visit_bare_assoc_hash(node) + node.assocs.to_h do |assoc| + # We can only convert regular key-value pairs. A double splat ** + # operator means it has to be converted at run-time. + raise CompilationError unless assoc.is_a?(Assoc) + [visit(assoc.key), visit(assoc.value)] + end + end + + def visit_float(node) + node.value.to_f + end + + alias visit_hash visit_bare_assoc_hash + + def visit_imaginary(node) + node.value.to_c + end + + def visit_int(node) + node.value.to_i + end + + def visit_label(node) + node.value.chomp(":").to_sym + end + + def visit_mrhs(node) + visit_all(node.parts) + end + + def visit_qsymbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_qwords(node) + visit_all(node.elements) + end + + def visit_range(node) + left, right = [visit(node.left), visit(node.right)] + node.operator.value === ".." ? left..right : left...right + end + + def visit_rational(node) + node.value.to_r + end + + def visit_regexp_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) + else + # Any interpolation of expressions or variables will result in the + # regular expression being constructed at run-time. + raise CompilationError + end + end + + # This isn't actually a visit method, though maybe it should be. It is + # responsible for converting the set of string options on a regular + # expression into its equivalent integer. + def visit_regexp_literal_flags(node) + node + .options + .chars + .inject(0) do |accum, option| + accum | + case option + when "i" + Regexp::IGNORECASE + when "x" + Regexp::EXTENDED + when "m" + Regexp::MULTILINE + else + raise "Unknown regexp option: #{option}" + end + end + end + + def visit_symbol_literal(node) + node.value.value.to_sym + end + + def visit_symbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_tstring_content(node) + node.value + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + # Any interpolation of expressions or variables will result in the + # string being constructed at run-time. + raise CompilationError + end + end + + def visit_words(node) + visit_all(node.elements) + end + + def visit_unsupported(_node) + raise CompilationError + end + + # Please forgive the metaprogramming here. This is used to create visit + # methods for every node that we did not explicitly handle. By default + # each of these methods will raise a CompilationError. + handled = instance_methods(false) + (Visitor.instance_methods(false) - handled).each do |method| + alias_method method, :visit_unsupported + end + end + + # This object is used to track the size of the stack at any given time. It + # is effectively a mini symbolic interpreter. It's necessary because when + # instruction sequences get serialized they include a :stack_max field on + # them. This field is used to determine how much stack space to allocate + # for the instruction sequence. + class Stack + attr_reader :current_size, :maximum_size + + def initialize + @current_size = 0 + @maximum_size = 0 + end + + def change_by(value) + @current_size += value + @maximum_size = @current_size if @current_size > @maximum_size + end + end + + # This represents every local variable associated with an instruction + # sequence. There are two kinds of locals: plain locals that are what you + # expect, and block proxy locals, which represent local variables + # associated with blocks that were passed into the current instruction + # sequence. + class LocalTable + # A local representing a block passed into the current instruction + # sequence. + class BlockLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # A regular local variable. + class PlainLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # The result of looking up a local variable in the current local table. + class Lookup + attr_reader :local, :index, :level + + def initialize(local, index, level) + @local = local + @index = index + @level = level + end + end + + attr_reader :locals + + def initialize + @locals = [] + end + + def find(name, level) + index = locals.index { |local| local.name == name } + Lookup.new(locals[index], index, level) if index + end + + def has?(name) + locals.any? { |local| local.name == name } + end + + def names + locals.map(&:name) + end + + def size + locals.length + end + + # Add a BlockLocal to the local table. + def block(name) + locals << BlockLocal.new(name) unless has?(name) + end + + # Add a PlainLocal to the local table. + def plain(name) + locals << PlainLocal.new(name) unless has?(name) + end + + # This is the offset from the top of the stack where this local variable + # lives. + def offset(index) + size - (index - 3) - 1 + end + end + + # This class is meant to mirror RubyVM::InstructionSequence. It contains a + # list of instructions along with the metadata pertaining to them. It also + # functions as a builder for the instruction sequence. + class InstructionSequence + MAGIC = "YARVInstructionSequence/SimpleDataFormat" + + # This provides a handle to the rb_iseq_load function, which allows you to + # pass a serialized iseq to Ruby and have it return a + # RubyVM::InstructionSequence object. + ISEQ_LOAD = + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + + # The type of the instruction sequence. + attr_reader :type + + # The name of the instruction sequence. + attr_reader :name + + # The parent instruction sequence, if there is one. + attr_reader :parent_iseq + + # The location of the root node of this instruction sequence. + attr_reader :location + + # This is the list of information about the arguments to this + # instruction sequence. + attr_accessor :argument_size + attr_reader :argument_options + + # The list of instructions for this instruction sequence. + attr_reader :insns + + # The table of local variables. + attr_reader :local_table + + # The hash of names of instance and class variables pointing to the + # index of their associated inline storage. + attr_reader :inline_storages + + # The index of the next inline storage that will be created. + attr_reader :storage_index + + # An object that will track the current size of the stack and the + # maximum size of the stack for this instruction sequence. + attr_reader :stack + + def initialize(type, name, parent_iseq, location) + @type = type + @name = name + @parent_iseq = parent_iseq + @location = location + + @argument_size = 0 + @argument_options = {} + + @local_table = LocalTable.new + @inline_storages = {} + @insns = [] + @storage_index = 0 + @stack = Stack.new + end + + def local_variable(name, level = 0) + if (lookup = local_table.find(name, level)) + lookup + elsif parent_iseq + parent_iseq.local_variable(name, level + 1) + end + end + + def push(insn) + insns << insn + insn + end + + def inline_storage + storage = storage_index + @storage_index += 1 + storage + end + + def inline_storage_for(name) + unless inline_storages.key?(name) + inline_storages[name] = inline_storage + end + + inline_storages[name] + end + + def length + insns.inject(0) do |sum, insn| + insn.is_a?(Array) ? sum + insn.length : sum + end + end + + def each_child + insns.each do |insn| + insn[1..].each do |operand| + yield operand if operand.is_a?(InstructionSequence) + end + end + end + + def eval + compiled = to_a + + # Temporary hack until we get these working. + compiled[4][:node_id] = 11 + compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] + + Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval + end + + def to_a + versions = RUBY_VERSION.split(".").map(&:to_i) + + [ + MAGIC, + versions[0], + versions[1], + 1, + { + arg_size: argument_size, + local_size: local_table.size, + stack_max: stack.maximum_size + }, + name, + "", + "", + location.start_line, + type, + local_table.names, + argument_options, + [], + insns.map { |insn| serialize(insn) } + ] + end + + private + + def serialize(insn) + case insn[0] + when :checkkeyword, :getblockparam, :getblockparamproxy, + :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, + :setlocal_WC_1, :setlocal + iseq = self + + case insn[0] + when :getlocal_WC_1, :setlocal_WC_1 + iseq = iseq.parent_iseq + when :getblockparam, :getblockparamproxy, :getlocal, :setlocal + insn[2].times { iseq = iseq.parent_iseq } + end + + # Here we need to map the local variable index to the offset + # from the top of the stack where it will be stored. + [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] + when :defineclass + [insn[0], insn[1], insn[2].to_a, insn[3]] + when :definemethod, :definesmethod + [insn[0], insn[1], insn[2].to_a] + when :send + # For any instructions that push instruction sequences onto the + # stack, we need to call #to_a on them as well. + [insn[0], insn[1], (insn[2].to_a if insn[2])] + when :once + [insn[0], insn[1].to_a, insn[2]] + else + insn + end + end + end + + # This class serves as a layer of indirection between the instruction + # sequence and the compiler. It allows us to provide different behavior + # for certain instructions depending on the Ruby version. For example, + # class variable reads and writes gained an inline cache in Ruby 3.0. So + # we place the logic for checking the Ruby version in this class. + class Builder + attr_reader :iseq, :stack + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + def initialize( + iseq, + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @iseq = iseq + @stack = iseq.stack + + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + end + + # This creates a new label at the current length of the instruction + # sequence. It is used as the operand for jump instructions. + def label + name = :"label_#{iseq.length}" + iseq.insns.last == name ? name : event(name) + end + + def event(name) + iseq.push(name) + name + end + + def adjuststack(number) + stack.change_by(-number) + iseq.push([:adjuststack, number]) + end + + def anytostring + stack.change_by(-2 + 1) + iseq.push([:anytostring]) + end + + def branchif(index) + stack.change_by(-1) + iseq.push([:branchif, index]) + end + + def branchnil(index) + stack.change_by(-1) + iseq.push([:branchnil, index]) + end + + def branchunless(index) + stack.change_by(-1) + iseq.push([:branchunless, index]) + end + + def checkkeyword(index, keyword_index) + stack.change_by(+1) + iseq.push([:checkkeyword, index, keyword_index]) + end + + def concatarray + stack.change_by(-2 + 1) + iseq.push([:concatarray]) + end + + def concatstrings(number) + stack.change_by(-number + 1) + iseq.push([:concatstrings, number]) + end + + def defined(type, name, message) + stack.change_by(-1 + 1) + iseq.push([:defined, type, name, message]) + end + + def defineclass(name, class_iseq, flags) + stack.change_by(-2 + 1) + iseq.push([:defineclass, name, class_iseq, flags]) + end + + def definemethod(name, method_iseq) + stack.change_by(0) + iseq.push([:definemethod, name, method_iseq]) + end + + def definesmethod(name, method_iseq) + stack.change_by(-1) + iseq.push([:definesmethod, name, method_iseq]) + end + + def dup + stack.change_by(-1 + 2) + iseq.push([:dup]) + end + + def duparray(object) + stack.change_by(+1) + iseq.push([:duparray, object]) + end + + def duphash(object) + stack.change_by(+1) + iseq.push([:duphash, object]) + end + + def dupn(number) + stack.change_by(+number) + iseq.push([:dupn, number]) + end + + def expandarray(length, flag) + stack.change_by(-1 + length) + iseq.push([:expandarray, length, flag]) + end + + def getblockparam(index, level) + stack.change_by(+1) + iseq.push([:getblockparam, index, level]) + end + + def getblockparamproxy(index, level) + stack.change_by(+1) + iseq.push([:getblockparamproxy, index, level]) + end + + def getclassvariable(name) + stack.change_by(+1) + + if RUBY_VERSION >= "3.0" + iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) + else + iseq.push([:getclassvariable, name]) + end + end + + def getconstant(name) + stack.change_by(-2 + 1) + iseq.push([:getconstant, name]) + end + + def getglobal(name) + stack.change_by(+1) + iseq.push([:getglobal, name]) + end + + def getinstancevariable(name) + stack.change_by(+1) + + if RUBY_VERSION >= "3.2" + iseq.push([:getinstancevariable, name, iseq.inline_storage]) + else + inline_storage = iseq.inline_storage_for(name) + iseq.push([:getinstancevariable, name, inline_storage]) + end + end + + def getlocal(index, level) + stack.change_by(+1) + + if operands_unification + # Specialize the getlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will look at the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:getlocal_WC_0, index]) + when 1 + iseq.push([:getlocal_WC_1, index]) + else + iseq.push([:getlocal, index, level]) + end + else + iseq.push([:getlocal, index, level]) + end + end + + def getspecial(key, type) + stack.change_by(-0 + 1) + iseq.push([:getspecial, key, type]) + end + + def intern + stack.change_by(-1 + 1) + iseq.push([:intern]) + end + + def invokeblock(method_id, argc, flag) + stack.change_by(-argc + 1) + iseq.push([:invokeblock, call_data(method_id, argc, flag)]) + end + + def invokesuper(method_id, argc, flag, block_iseq) + stack.change_by(-(argc + 1) + 1) + + cdata = call_data(method_id, argc, flag) + iseq.push([:invokesuper, cdata, block_iseq]) + end + + def jump(index) + stack.change_by(0) + iseq.push([:jump, index]) + end + + def leave + stack.change_by(-1) + iseq.push([:leave]) + end + + def newarray(length) + stack.change_by(-length + 1) + iseq.push([:newarray, length]) + end + + def newhash(length) + stack.change_by(-length + 1) + iseq.push([:newhash, length]) + end + + def newrange(flag) + stack.change_by(-2 + 1) + iseq.push([:newrange, flag]) + end + + def nop + stack.change_by(0) + iseq.push([:nop]) + end + + def objtostring(method_id, argc, flag) + stack.change_by(-1 + 1) + iseq.push([:objtostring, call_data(method_id, argc, flag)]) + end + + def once(postexe_iseq, inline_storage) + stack.change_by(+1) + iseq.push([:once, postexe_iseq, inline_storage]) + end + + def opt_getconstant_path(names) + if RUBY_VERSION >= "3.2" + stack.change_by(+1) + iseq.push([:opt_getconstant_path, names]) + else + inline_storage = iseq.inline_storage + getinlinecache = opt_getinlinecache(-1, inline_storage) + + if names[0] == :"" + names.shift + pop + putobject(Object) + end + + names.each_with_index do |name, index| + putobject(index == 0) + getconstant(name) + end + + opt_setinlinecache(inline_storage) + getinlinecache[1] = label + end + end + + def opt_getinlinecache(offset, inline_storage) + stack.change_by(+1) + iseq.push([:opt_getinlinecache, offset, inline_storage]) + end + + def opt_newarray_max(length) + if specialized_instruction + stack.change_by(-length + 1) + iseq.push([:opt_newarray_max, length]) + else + newarray(length) + send(:max, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_newarray_min(length) + if specialized_instruction + stack.change_by(-length + 1) + iseq.push([:opt_newarray_min, length]) + else + newarray(length) + send(:min, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_setinlinecache(inline_storage) + stack.change_by(-1 + 1) + iseq.push([:opt_setinlinecache, inline_storage]) + end + + def opt_str_freeze(value) + if specialized_instruction + stack.change_by(+1) + iseq.push( + [ + :opt_str_freeze, + value, + call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) + ] + ) + else + putstring(value) + send(:freeze, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_str_uminus(value) + if specialized_instruction + stack.change_by(+1) + iseq.push( + [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] + ) + else + putstring(value) + send(:-@, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def pop + stack.change_by(-1) + iseq.push([:pop]) + end + + def putnil + stack.change_by(+1) + iseq.push([:putnil]) + end + + def putobject(object) + stack.change_by(+1) + + if operands_unification + # Specialize the putobject instruction based on the value of the + # object. If it's 0 or 1, then there's a specialized instruction + # that will push the object onto the stack and requires fewer + # operands. + if object.eql?(0) + iseq.push([:putobject_INT2FIX_0_]) + elsif object.eql?(1) + iseq.push([:putobject_INT2FIX_1_]) + else + iseq.push([:putobject, object]) + end + else + iseq.push([:putobject, object]) + end + end + + def putself + stack.change_by(+1) + iseq.push([:putself]) + end + + def putspecialobject(object) + stack.change_by(+1) + iseq.push([:putspecialobject, object]) + end + + def putstring(object) + stack.change_by(+1) + iseq.push([:putstring, object]) + end + + def send(method_id, argc, flag, block_iseq = nil) + stack.change_by(-(argc + 1) + 1) + cdata = call_data(method_id, argc, flag) + + if specialized_instruction + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and the + # number of arguments. + + # stree-ignore + if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 + case [method_id, argc] + when [:length, 0] then iseq.push([:opt_length, cdata]) + when [:size, 0] then iseq.push([:opt_size, cdata]) + when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) + when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) + when [:succ, 0] then iseq.push([:opt_succ, cdata]) + when [:!, 0] then iseq.push([:opt_not, cdata]) + when [:+, 1] then iseq.push([:opt_plus, cdata]) + when [:-, 1] then iseq.push([:opt_minus, cdata]) + when [:*, 1] then iseq.push([:opt_mult, cdata]) + when [:/, 1] then iseq.push([:opt_div, cdata]) + when [:%, 1] then iseq.push([:opt_mod, cdata]) + when [:==, 1] then iseq.push([:opt_eq, cdata]) + when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) + when [:<, 1] then iseq.push([:opt_lt, cdata]) + when [:<=, 1] then iseq.push([:opt_le, cdata]) + when [:>, 1] then iseq.push([:opt_gt, cdata]) + when [:>=, 1] then iseq.push([:opt_ge, cdata]) + when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) + when [:[], 1] then iseq.push([:opt_aref, cdata]) + when [:&, 1] then iseq.push([:opt_and, cdata]) + when [:|, 1] then iseq.push([:opt_or, cdata]) + when [:[]=, 2] then iseq.push([:opt_aset, cdata]) + when [:!=, 1] + eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) + iseq.push([:opt_neq, eql_data, cdata]) + else + iseq.push([:opt_send_without_block, cdata]) + end + else + iseq.push([:send, cdata, block_iseq]) + end + else + iseq.push([:send, cdata, block_iseq]) + end + end + + def setclassvariable(name) + stack.change_by(-1) + + if RUBY_VERSION >= "3.0" + iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) + else + iseq.push([:setclassvariable, name]) + end + end + + def setconstant(name) + stack.change_by(-2) + iseq.push([:setconstant, name]) + end + + def setglobal(name) + stack.change_by(-1) + iseq.push([:setglobal, name]) + end + + def setinstancevariable(name) + stack.change_by(-1) + + if RUBY_VERSION >= "3.2" + iseq.push([:setinstancevariable, name, iseq.inline_storage]) + else + inline_storage = iseq.inline_storage_for(name) + iseq.push([:setinstancevariable, name, inline_storage]) + end + end + + def setlocal(index, level) + stack.change_by(-1) + + if operands_unification + # Specialize the setlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will write to the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:setlocal_WC_0, index]) + when 1 + iseq.push([:setlocal_WC_1, index]) + else + iseq.push([:setlocal, index, level]) + end + else + iseq.push([:setlocal, index, level]) + end + end + + def setn(number) + stack.change_by(-1 + 1) + iseq.push([:setn, number]) + end + + def splatarray(flag) + stack.change_by(-1 + 1) + iseq.push([:splatarray, flag]) + end + + def swap + stack.change_by(-2 + 2) + iseq.push([:swap]) + end + + def topn(number) + stack.change_by(+1) + iseq.push([:topn, number]) + end + + def toregexp(options, length) + stack.change_by(-length + 1) + iseq.push([:toregexp, options, length]) + end + + private + + # This creates a call data object that is used as the operand for the + # send, invokesuper, and objtostring instructions. + def call_data(method_id, argc, flag) + { mid: method_id, flag: flag, orig_argc: argc } + end + end + + # These constants correspond to the putspecialobject instruction. They are + # used to represent special objects that are pushed onto the stack. + VM_SPECIAL_OBJECT_VMCORE = 1 + VM_SPECIAL_OBJECT_CBASE = 2 + VM_SPECIAL_OBJECT_CONST_BASE = 3 + + # These constants correspond to the flag passed as part of the call data + # structure on the send instruction. They are used to represent various + # metadata about the callsite (e.g., were keyword arguments used?, was a + # block given?, etc.). + VM_CALL_ARGS_SPLAT = 1 << 0 + VM_CALL_ARGS_BLOCKARG = 1 << 1 + VM_CALL_FCALL = 1 << 2 + VM_CALL_VCALL = 1 << 3 + VM_CALL_ARGS_SIMPLE = 1 << 4 + VM_CALL_BLOCKISEQ = 1 << 5 + VM_CALL_KWARG = 1 << 6 + VM_CALL_KW_SPLAT = 1 << 7 + VM_CALL_TAILCALL = 1 << 8 + VM_CALL_SUPER = 1 << 9 + VM_CALL_ZSUPER = 1 << 10 + VM_CALL_OPT_SEND = 1 << 11 + VM_CALL_KW_SPLAT_MUT = 1 << 12 + + # These constants correspond to the value passed as part of the defined + # instruction. It's an enum defined in the CRuby codebase that tells that + # instruction what kind of defined check to perform. + DEFINED_NIL = 1 + DEFINED_IVAR = 2 + DEFINED_LVAR = 3 + DEFINED_GVAR = 4 + DEFINED_CVAR = 5 + DEFINED_CONST = 6 + DEFINED_METHOD = 7 + DEFINED_YIELD = 8 + DEFINED_ZSUPER = 9 + DEFINED_SELF = 10 + DEFINED_TRUE = 11 + DEFINED_FALSE = 12 + DEFINED_ASGN = 13 + DEFINED_EXPR = 14 + DEFINED_REF = 15 + DEFINED_FUNC = 16 + DEFINED_CONST_FROM = 17 + + # These constants correspond to the value passed in the flags as part of + # the defineclass instruction. + VM_DEFINECLASS_TYPE_CLASS = 0 + VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 + VM_DEFINECLASS_TYPE_MODULE = 2 + VM_DEFINECLASS_FLAG_SCOPED = 8 + VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 + + # These options mirror the compilation options that we currently support + # that can be also passed to RubyVM::InstructionSequence.compile. + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + # The current instruction sequence that is being compiled. + attr_reader :current_iseq + + # This is the current builder that is being used to construct the current + # instruction sequence. + attr_reader :builder + + # A boolean to track if we're currently compiling the last statement + # within a set of statements. This information is necessary to determine + # if we need to return the value of the last statement. + attr_reader :last_statement + + def initialize( + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + + @current_iseq = nil + @builder = nil + @last_statement = false + end + + def visit_BEGIN(node) + visit(node.statements) + end + + def visit_CHAR(node) + if frozen_string_literal + builder.putobject(node.value[1..]) + else + builder.putstring(node.value[1..]) + end + end + + def visit_END(node) + name = "block in #{current_iseq.name}" + once_iseq = + with_instruction_sequence(:block, name, current_iseq, node) do + postexe_iseq = + with_instruction_sequence(:block, name, current_iseq, node) do + *statements, last_statement = node.statements.body + visit_all(statements) + with_last_statement { visit(last_statement) } + builder.leave + end + + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.send(:"core#set_postexe", 0, VM_CALL_FCALL, postexe_iseq) + builder.leave + end + + builder.once(once_iseq, current_iseq.inline_storage) + builder.pop + end + + def visit_alias(node) + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) + visit(node.left) + visit(node.right) + builder.send(:"core#set_method_alias", 3, VM_CALL_ARGS_SIMPLE) + end + + def visit_aref(node) + visit(node.collection) + visit(node.index) + builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) + end + + def visit_arg_block(node) + visit(node.value) + end + + def visit_arg_paren(node) + visit(node.arguments) + end + + def visit_arg_star(node) + visit(node.value) + builder.splatarray(false) + end + + def visit_args(node) + visit_all(node.parts) + end + + def visit_array(node) + if (compiled = RubyVisitor.compile(node)) + builder.duparray(compiled) + else + length = 0 + + node.contents.parts.each do |part| + if part.is_a?(ArgStar) + if length > 0 + builder.newarray(length) + length = 0 + end + + visit(part.value) + builder.concatarray + else + visit(part) + length += 1 + end + end + + builder.newarray(length) if length > 0 + if length > 0 && length != node.contents.parts.length + builder.concatarray + end + end + end + + def visit_assign(node) + case node.target + when ARefField + builder.putnil + visit(node.target.collection) + visit(node.target.index) + visit(node.value) + builder.setn(3) + builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + builder.pop + when ConstPathField + names = constant_names(node.target) + name = names.pop + + if RUBY_VERSION >= "3.2" + builder.opt_getconstant_path(names) + visit(node.value) + builder.swap + builder.topn(1) + builder.swap + builder.setconstant(name) + else + visit(node.value) + builder.dup if last_statement? + builder.opt_getconstant_path(names) + builder.setconstant(name) + end + when Field + builder.putnil + visit(node.target) + visit(node.value) + builder.setn(2) + builder.send(:"#{node.target.name.value}=", 1, VM_CALL_ARGS_SIMPLE) + builder.pop + when TopConstField + name = node.target.constant.value.to_sym + + if RUBY_VERSION >= "3.2" + builder.putobject(Object) + visit(node.value) + builder.swap + builder.topn(1) + builder.swap + builder.setconstant(name) + else + visit(node.value) + builder.dup if last_statement? + builder.putobject(Object) + builder.setconstant(name) + end + when VarField + visit(node.value) + builder.dup if last_statement? + + case node.target.value + when Const + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.setconstant(node.target.value.value.to_sym) + when CVar + builder.setclassvariable(node.target.value.value.to_sym) + when GVar + builder.setglobal(node.target.value.value.to_sym) + when Ident + local_variable = visit(node.target) + builder.setlocal(local_variable.index, local_variable.level) + when IVar + builder.setinstancevariable(node.target.value.value.to_sym) + end + end + end + + def visit_assoc(node) + visit(node.key) + visit(node.value) + end + + def visit_assoc_splat(node) + visit(node.value) + end + + def visit_backref(node) + builder.getspecial(1, 2 * node.value[1..].to_i) + end + + def visit_bare_assoc_hash(node) + if (compiled = RubyVisitor.compile(node)) + builder.duphash(compiled) + else + visit_all(node.assocs) + end + end + + def visit_binary(node) + case node.operator + when :"&&" + visit(node.left) + builder.dup + + branchunless = builder.branchunless(-1) + builder.pop + + visit(node.right) + branchunless[1] = builder.label + when :"||" + visit(node.left) + builder.dup + + branchif = builder.branchif(-1) + builder.pop + + visit(node.right) + branchif[1] = builder.label + else + visit(node.left) + visit(node.right) + builder.send(node.operator, 1, VM_CALL_ARGS_SIMPLE) + end + end + + def visit_block(node) + with_instruction_sequence( + :block, + "block in #{current_iseq.name}", + current_iseq, + node + ) do + builder.event(:RUBY_EVENT_B_CALL) + visit(node.block_var) + visit(node.bodystmt) + builder.event(:RUBY_EVENT_B_RETURN) + builder.leave + end + end + + def visit_block_var(node) + params = node.params + + if params.requireds.length == 1 && params.optionals.empty? && + !params.rest && params.posts.empty? && params.keywords.empty? && + !params.keyword_rest && !params.block + current_iseq.argument_options[:ambiguous_param0] = true + end + + visit(node.params) + + node.locals.each do |local| + current_iseq.local_table.plain(local.value.to_sym) + end + end + + def visit_blockarg(node) + current_iseq.argument_options[:block_start] = current_iseq.argument_size + current_iseq.local_table.block(node.name.value.to_sym) + current_iseq.argument_size += 1 + end + + def visit_bodystmt(node) + visit(node.statements) + end + + def visit_call(node) + if node.is_a?(CallNode) + return( + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) + ) + ) + end + + arg_parts = argument_parts(node.arguments) + argc = arg_parts.length + + # First we're going to check if we're calling a method on an array + # literal without any arguments. In that case there are some + # specializations we might be able to perform. + if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) + case node.receiver + when ArrayLiteral + parts = node.receiver.contents&.parts || [] + + if parts.none? { |part| part.is_a?(ArgStar) } && + RubyVisitor.compile(node.receiver).nil? + case node.message.value + when "max" + visit(node.receiver.contents) + builder.opt_newarray_max(parts.length) + return + when "min" + visit(node.receiver.contents) + builder.opt_newarray_min(parts.length) + return + end + end + when StringLiteral + if RubyVisitor.compile(node.receiver).nil? + case node.message.value + when "-@" + builder.opt_str_uminus(node.receiver.parts.first.value) + return + when "freeze" + builder.opt_str_freeze(node.receiver.parts.first.value) + return + end + end + end + end + + if node.receiver + if node.receiver.is_a?(VarRef) && + ( + lookup = + current_iseq.local_variable(node.receiver.value.value.to_sym) + ) && lookup.local.is_a?(LocalTable::BlockLocal) + builder.getblockparamproxy(lookup.index, lookup.level) + else + visit(node.receiver) + end + else + builder.putself + end + + branchnil = + if node.operator&.value == "&." + builder.dup + builder.branchnil(-1) + end + + flag = 0 + + arg_parts.each do |arg_part| + case arg_part + when ArgBlock + argc -= 1 + flag |= VM_CALL_ARGS_BLOCKARG + visit(arg_part) + when ArgStar + flag |= VM_CALL_ARGS_SPLAT + visit(arg_part) + when ArgsForward + flag |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_BLOCKARG + + lookup = current_iseq.local_table.find(:*, 0) + builder.getlocal(lookup.index, lookup.level) + builder.splatarray(arg_parts.length != 1) + + lookup = current_iseq.local_table.find(:&, 0) + builder.getblockparamproxy(lookup.index, lookup.level) + when BareAssocHash + flag |= VM_CALL_KW_SPLAT + visit(arg_part) + else + visit(arg_part) + end + end + + block_iseq = visit(node.block) if node.block + flag |= VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 + flag |= VM_CALL_FCALL if node.receiver.nil? + + builder.send(node.message.value.to_sym, argc, flag, block_iseq) + branchnil[1] = builder.label if branchnil + end + + def visit_case(node) + visit(node.value) if node.value + + clauses = [] + else_clause = nil + + current = node.consequent + + while current + clauses << current + + if (current = current.consequent).is_a?(Else) + else_clause = current + break + end + end + + branches = + clauses.map do |clause| + visit(clause.arguments) + builder.topn(1) + builder.send(:===, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + [clause, builder.branchif(:label_00)] + end + + builder.pop + + else_clause ? visit(else_clause) : builder.putnil + + builder.leave + + branches.each_with_index do |(clause, branchif), index| + builder.leave if index != 0 + branchif[1] = builder.label + builder.pop + visit(clause) + end + end + + def visit_class(node) + name = node.constant.constant.value.to_sym + class_iseq = + with_instruction_sequence( + :class, + "", + current_iseq, + node + ) do + builder.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + builder.event(:RUBY_EVENT_END) + builder.leave + end + + flags = VM_DEFINECLASS_TYPE_CLASS + + case node.constant + when ConstPathRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + when TopConstRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + builder.putobject(Object) + end + + if node.superclass + flags |= VM_DEFINECLASS_FLAG_HAS_SUPERCLASS + visit(node.superclass) + else + builder.putnil + end + + builder.defineclass(name, class_iseq, flags) + end + + def visit_command(node) + visit_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_command_call(node) + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_const_path_field(node) + visit(node.parent) + end + + def visit_const_path_ref(node) + names = constant_names(node) + builder.opt_getconstant_path(names) + end + + def visit_def(node) + method_iseq = + with_instruction_sequence( + :method, + node.name.value, + current_iseq, + node + ) do + visit(node.params) if node.params + builder.event(:RUBY_EVENT_CALL) + visit(node.bodystmt) + builder.event(:RUBY_EVENT_RETURN) + builder.leave + end + + name = node.name.value.to_sym + + if node.target + visit(node.target) + builder.definesmethod(name, method_iseq) + else + builder.definemethod(name, method_iseq) + end + + builder.putobject(name) + end + + def visit_defined(node) + case node.value + when Assign + # If we're assigning to a local variable, then we need to make sure + # that we put it into the local table. + if node.value.target.is_a?(VarField) && + node.value.target.value.is_a?(Ident) + current_iseq.local_table.plain(node.value.target.value.value.to_sym) + end + + builder.putobject("assignment") + when VarRef + value = node.value.value + name = value.value.to_sym + + case value + when Const + builder.putnil + builder.defined(DEFINED_CONST, name, "constant") + when CVar + builder.putnil + builder.defined(DEFINED_CVAR, name, "class variable") + when GVar + builder.putnil + builder.defined(DEFINED_GVAR, name, "global-variable") + when Ident + builder.putobject("local-variable") + when IVar + builder.putnil + builder.defined(DEFINED_IVAR, name, "instance-variable") + when Kw + case name + when :false + builder.putobject("false") + when :nil + builder.putobject("nil") + when :self + builder.putobject("self") + when :true + builder.putobject("true") + end + end + when VCall + builder.putself + + name = node.value.value.value.to_sym + builder.defined(DEFINED_FUNC, name, "method") + when YieldNode + builder.putnil + builder.defined(DEFINED_YIELD, false, "yield") + when ZSuper + builder.putnil + builder.defined(DEFINED_ZSUPER, false, "super") + else + builder.putobject("expression") + end + end + + def visit_dyna_symbol(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + builder.putobject(node.parts.first.value.to_sym) + end + end + + def visit_else(node) + visit(node.statements) + builder.pop unless last_statement? + end + + def visit_elsif(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.statements, + consequent: node.consequent, + location: node.location + ) + ) + end + + def visit_field(node) + visit(node.parent) + end + + def visit_float(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_for(node) + visit(node.collection) + + name = node.index.value.value.to_sym + current_iseq.local_table.plain(name) + + block_iseq = + with_instruction_sequence( + :block, + "block in #{current_iseq.name}", + current_iseq, + node.statements + ) do + current_iseq.argument_options[:lead_num] ||= 0 + current_iseq.argument_options[:lead_num] += 1 + current_iseq.argument_options[:ambiguous_param0] = true + + current_iseq.argument_size += 1 + current_iseq.local_table.plain(2) + + builder.getlocal(0, 0) + + local_variable = current_iseq.local_variable(name) + builder.setlocal(local_variable.index, local_variable.level) + + builder.event(:RUBY_EVENT_B_CALL) + builder.nop + + visit(node.statements) + builder.event(:RUBY_EVENT_B_RETURN) + builder.leave + end + + builder.send(:each, 0, 0, block_iseq) + end + + def visit_hash(node) + builder.duphash(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + visit_all(node.assocs) + builder.newhash(node.assocs.length * 2) + end + + def visit_heredoc(node) + if node.beginning.value.end_with?("`") + visit_xstring_literal(node) + elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + builder.concatstrings(length) + end + end + + def visit_if(node) + visit(node.predicate) + branchunless = builder.branchunless(-1) + visit(node.statements) + + if last_statement? + builder.leave + branchunless[1] = builder.label + + node.consequent ? visit(node.consequent) : builder.putnil + else + builder.pop + + if node.consequent + jump = builder.jump(-1) + branchunless[1] = builder.label + visit(node.consequent) + jump[1] = builder.label + else + branchunless[1] = builder.label + end + end + end + + def visit_if_op(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.truthy, + consequent: + Else.new( + keyword: Kw.new(value: "else", location: Location.default), + statements: node.falsy, + location: Location.default + ), + location: Location.default + ) + ) + end + + def visit_imaginary(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_int(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_kwrest_param(node) + current_iseq.argument_options[:kwrest] = current_iseq.argument_size + current_iseq.argument_size += 1 + current_iseq.local_table.plain(node.name.value.to_sym) + end + + def visit_label(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_lambda(node) + lambda_iseq = + with_instruction_sequence( + :block, + "block in #{current_iseq.name}", + current_iseq, + node + ) do + builder.event(:RUBY_EVENT_B_CALL) + visit(node.params) + visit(node.statements) + builder.event(:RUBY_EVENT_B_RETURN) + builder.leave + end + + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.send(:lambda, 0, VM_CALL_FCALL, lambda_iseq) + end + + def visit_lambda_var(node) + visit_block_var(node) + end + + def visit_massign(node) + visit(node.value) + builder.dup + visit(node.target) + end + + def visit_method_add_block(node) + visit_call( + CommandCall.new( + receiver: node.call.receiver, + operator: node.call.operator, + message: node.call.message, + arguments: node.call.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_mlhs(node) + lookups = [] + + node.parts.each do |part| + case part + when VarField + lookups << visit(part) + end + end + + builder.expandarray(lookups.length, 0) + + lookups.each { |lookup| builder.setlocal(lookup.index, lookup.level) } + end + + def visit_module(node) + name = node.constant.constant.value.to_sym + module_iseq = + with_instruction_sequence( + :class, + "", + current_iseq, + node + ) do + builder.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + builder.event(:RUBY_EVENT_END) + builder.leave + end + + flags = VM_DEFINECLASS_TYPE_MODULE + + case node.constant + when ConstPathRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + when TopConstRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + builder.putobject(Object) + end + + builder.putnil + builder.defineclass(name, module_iseq, flags) + end + + def visit_mrhs(node) + if (compiled = RubyVisitor.compile(node)) + builder.duparray(compiled) + else + visit_all(node.parts) + builder.newarray(node.parts.length) + end + end + + def visit_not(node) + visit(node.statement) + builder.send(:!, 0, VM_CALL_ARGS_SIMPLE) + end + + def visit_opassign(node) + flag = VM_CALL_ARGS_SIMPLE + if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) + flag |= VM_CALL_FCALL + end + + case (operator = node.operator.value.chomp("=").to_sym) + when :"&&" + branchunless = nil + + with_opassign(node) do + builder.dup + branchunless = builder.branchunless(-1) + builder.pop + visit(node.value) + end + + case node.target + when ARefField + builder.leave + branchunless[1] = builder.label + builder.setn(3) + builder.adjuststack(3) + when ConstPathField, TopConstField + branchunless[1] = builder.label + builder.swap + builder.pop + else + branchunless[1] = builder.label + end + when :"||" + if node.target.is_a?(ConstPathField) || + node.target.is_a?(TopConstField) + opassign_defined(node) + builder.swap + builder.pop + elsif node.target.is_a?(VarField) && + [Const, CVar, GVar].include?(node.target.value.class) + opassign_defined(node) + else + branchif = nil + + with_opassign(node) do + builder.dup + branchif = builder.branchif(-1) + builder.pop + visit(node.value) + end + + if node.target.is_a?(ARefField) + builder.leave + branchif[1] = builder.label + builder.setn(3) + builder.adjuststack(3) + else + branchif[1] = builder.label + end + end + else + with_opassign(node) do + visit(node.value) + builder.send(operator, 1, flag) + end + end + end + + def visit_params(node) + argument_options = current_iseq.argument_options + + if node.requireds.any? + argument_options[:lead_num] = 0 + + node.requireds.each do |required| + current_iseq.local_table.plain(required.value.to_sym) + current_iseq.argument_size += 1 + argument_options[:lead_num] += 1 + end + end + + node.optionals.each do |(optional, value)| + index = current_iseq.local_table.size + name = optional.value.to_sym + + current_iseq.local_table.plain(name) + current_iseq.argument_size += 1 + + unless argument_options.key?(:opt) + argument_options[:opt] = [builder.label] + end + + visit(value) + builder.setlocal(index, 0) + current_iseq.argument_options[:opt] << builder.label + end + + visit(node.rest) if node.rest + + if node.posts.any? + argument_options[:post_start] = current_iseq.argument_size + argument_options[:post_num] = 0 + + node.posts.each do |post| + current_iseq.local_table.plain(post.value.to_sym) + current_iseq.argument_size += 1 + argument_options[:post_num] += 1 + end + end + + if node.keywords.any? + argument_options[:kwbits] = 0 + argument_options[:keyword] = [] + checkkeywords = [] + + node.keywords.each_with_index do |(keyword, value), keyword_index| + name = keyword.value.chomp(":").to_sym + index = current_iseq.local_table.size + + current_iseq.local_table.plain(name) + current_iseq.argument_size += 1 + argument_options[:kwbits] += 1 + + if value.nil? + argument_options[:keyword] << name + else + begin + compiled = value.accept(RubyVisitor.new) + argument_options[:keyword] << [name, compiled] + rescue RubyVisitor::CompilationError + argument_options[:keyword] << [name] + checkkeywords << builder.checkkeyword(-1, keyword_index) + branchif = builder.branchif(-1) + visit(value) + builder.setlocal(index, 0) + branchif[1] = builder.label + end + end + end + + name = node.keyword_rest ? 3 : 2 + current_iseq.argument_size += 1 + current_iseq.local_table.plain(name) + + lookup = current_iseq.local_table.find(name, 0) + checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } + end + + if node.keyword_rest.is_a?(ArgsForward) + current_iseq.local_table.plain(:*) + current_iseq.local_table.plain(:&) + + current_iseq.argument_options[ + :rest_start + ] = current_iseq.argument_size + current_iseq.argument_options[ + :block_start + ] = current_iseq.argument_size + 1 + + current_iseq.argument_size += 2 + elsif node.keyword_rest + visit(node.keyword_rest) + end + + visit(node.block) if node.block + end + + def visit_paren(node) + visit(node.contents) + end + + def visit_program(node) + node.statements.body.each do |statement| + break unless statement.is_a?(Comment) + + if statement.value == "# frozen_string_literal: true" + @frozen_string_literal = true + end + end + + preexes = [] + statements = [] + + node.statements.body.each do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + # ignore + when BEGINBlock + preexes << statement + else + statements << statement + end + end + + with_instruction_sequence(:top, "", nil, node) do + visit_all(preexes) + + if statements.empty? + builder.putnil + else + *statements, last_statement = statements + visit_all(statements) + with_last_statement { visit(last_statement) } + end + + builder.leave + end + end + + def visit_qsymbols(node) + builder.duparray(node.accept(RubyVisitor.new)) + end + + def visit_qwords(node) + if frozen_string_literal + builder.duparray(node.accept(RubyVisitor.new)) + else + visit_all(node.elements) + builder.newarray(node.elements.length) + end + end + + def visit_range(node) + builder.putobject(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + visit(node.left) + visit(node.right) + builder.newrange(node.operator.value == ".." ? 0 : 1) + end + + def visit_rational(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_regexp_literal(node) + builder.putobject(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + flags = RubyVisitor.new.visit_regexp_literal_flags(node) + length = visit_string_parts(node) + builder.toregexp(flags, length) + end + + def visit_rest_param(node) + current_iseq.local_table.plain(node.name.value.to_sym) + current_iseq.argument_options[:rest_start] = current_iseq.argument_size + current_iseq.argument_size += 1 + end + + def visit_sclass(node) + visit(node.target) + builder.putnil + + singleton_iseq = + with_instruction_sequence( + :class, + "singleton class", + current_iseq, + node + ) do + builder.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + builder.event(:RUBY_EVENT_END) + builder.leave + end + + builder.defineclass( + :singletonclass, + singleton_iseq, + VM_DEFINECLASS_TYPE_SINGLETON_CLASS + ) + end + + def visit_statements(node) + statements = + node.body.select do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + false + else + true + end + end + + statements.empty? ? builder.putnil : visit_all(statements) + end + + def visit_string_concat(node) + value = node.left.parts.first.value + node.right.parts.first.value + content = TStringContent.new(value: value, location: node.location) + + literal = + StringLiteral.new( + parts: [content], + quote: node.left.quote, + location: node.location + ) + visit_string_literal(literal) + end + + def visit_string_embexpr(node) + visit(node.statements) + end + + def visit_string_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + builder.concatstrings(length) + end + end + + def visit_super(node) + builder.putself + visit(node.arguments) + builder.invokesuper( + nil, + argument_parts(node.arguments).length, + VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER, + nil + ) + end + + def visit_symbol_literal(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_symbols(node) + builder.duparray(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + node.elements.each do |element| + if element.parts.length == 1 && + element.parts.first.is_a?(TStringContent) + builder.putobject(element.parts.first.value.to_sym) + else + length = visit_string_parts(element) + builder.concatstrings(length) + builder.intern + end + end + + builder.newarray(node.elements.length) + end + + def visit_top_const_ref(node) + builder.opt_getconstant_path(constant_names(node)) + end + + def visit_tstring_content(node) + if frozen_string_literal + builder.putobject(node.accept(RubyVisitor.new)) + else + builder.putstring(node.accept(RubyVisitor.new)) + end + end + + def visit_unary(node) + method_id = + case node.operator + when "+", "-" + "#{node.operator}@" + else + node.operator + end + + visit_call( + CommandCall.new( + receiver: node.statement, + operator: nil, + message: Ident.new(value: method_id, location: Location.default), + arguments: nil, + block: nil, + location: Location.default + ) + ) + end + + def visit_undef(node) + node.symbols.each_with_index do |symbol, index| + builder.pop if index != 0 + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) + visit(symbol) + builder.send(:"core#undef_method", 2, VM_CALL_ARGS_SIMPLE) + end + end + + def visit_unless(node) + visit(node.predicate) + branchunless = builder.branchunless(-1) + node.consequent ? visit(node.consequent) : builder.putnil + + if last_statement? + builder.leave + branchunless[1] = builder.label + + visit(node.statements) + else + builder.pop + + if node.consequent + jump = builder.jump(-1) + branchunless[1] = builder.label + visit(node.consequent) + jump[1] = builder.label + else + branchunless[1] = builder.label + end + end + end + + def visit_until(node) + jumps = [] + + jumps << builder.jump(-1) + builder.putnil + builder.pop + jumps << builder.jump(-1) + + label = builder.label + visit(node.statements) + builder.pop + jumps.each { |jump| jump[1] = builder.label } + + visit(node.predicate) + builder.branchunless(label) + builder.putnil if last_statement? + end + + def visit_var_field(node) + case node.value + when CVar, IVar + name = node.value.value.to_sym + current_iseq.inline_storage_for(name) + when Ident + name = node.value.value.to_sym + + if (local_variable = current_iseq.local_variable(name)) + local_variable + else + current_iseq.local_table.plain(name) + current_iseq.local_variable(name) + end + end + end + + def visit_var_ref(node) + case node.value + when Const + builder.opt_getconstant_path(constant_names(node)) + when CVar + name = node.value.value.to_sym + builder.getclassvariable(name) + when GVar + builder.getglobal(node.value.value.to_sym) + when Ident + lookup = current_iseq.local_variable(node.value.value.to_sym) + + case lookup.local + when LocalTable::BlockLocal + builder.getblockparam(lookup.index, lookup.level) + when LocalTable::PlainLocal + builder.getlocal(lookup.index, lookup.level) + end + when IVar + name = node.value.value.to_sym + builder.getinstancevariable(name) + when Kw + case node.value.value + when "false" + builder.putobject(false) + when "nil" + builder.putnil + when "self" + builder.putself + when "true" + builder.putobject(true) + end + end + end + + def visit_vcall(node) + builder.putself + + flag = VM_CALL_FCALL | VM_CALL_VCALL | VM_CALL_ARGS_SIMPLE + builder.send(node.value.value.to_sym, 0, flag) + end + + def visit_when(node) + visit(node.statements) + end + + def visit_while(node) + jumps = [] + + jumps << builder.jump(-1) + builder.putnil + builder.pop + jumps << builder.jump(-1) + + label = builder.label + visit(node.statements) + builder.pop + jumps.each { |jump| jump[1] = builder.label } + + visit(node.predicate) + builder.branchif(label) + builder.putnil if last_statement? + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + builder.concatstrings(length) + end + end + + def visit_words(node) + converted = nil + + if frozen_string_literal + begin + converted = node.accept(RubyVisitor.new) + rescue RubyVisitor::CompilationError + end + end + + if converted + builder.duparray(converted) + else + visit_all(node.elements) + builder.newarray(node.elements.length) + end + end + + def visit_xstring_literal(node) + builder.putself + length = visit_string_parts(node) + builder.concatstrings(node.parts.length) if length > 1 + builder.send(:`, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + end + + def visit_yield(node) + parts = argument_parts(node.arguments) + visit_all(parts) + builder.invokeblock(nil, parts.length, VM_CALL_ARGS_SIMPLE) + end + + def visit_zsuper(_node) + builder.putself + builder.invokesuper( + nil, + 0, + VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER | VM_CALL_ZSUPER, + nil + ) + end + + private + + # This is a helper that is used in places where arguments may be present + # or they may be wrapped in parentheses. It's meant to descend down the + # tree and return an array of argument nodes. + def argument_parts(node) + case node + when nil + [] + when Args + node.parts + when ArgParen + if node.arguments.is_a?(ArgsForward) + [node.arguments] + else + node.arguments.parts + end + when Paren + node.contents.parts + end + end + + # Constant names when they are being assigned or referenced come in as a + # tree, but it's more convenient to work with them as an array. This + # method converts them into that array. This is nice because it's the + # operand that goes to opt_getconstant_path in Ruby 3.2. + def constant_names(node) + current = node + names = [] + + while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) + names.unshift(current.constant.value.to_sym) + current = current.parent + end + + case current + when VarField, VarRef + names.unshift(current.value.value.to_sym) + when TopConstRef + names.unshift(current.constant.value.to_sym) + names.unshift(:"") + end + + names + end + + # For the most part when an OpAssign (operator assignment) node with a ||= + # operator is being compiled it's a matter of reading the target, checking + # if the value should be evaluated, evaluating it if so, and then writing + # the result back to the target. + # + # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we + # first check if the value is defined using the defined instruction. I + # don't know why it is necessary, and suspect that it isn't. + def opassign_defined(node) + case node.target + when ConstPathField + visit(node.target.parent) + name = node.target.constant.value.to_sym + + builder.dup + builder.defined(DEFINED_CONST_FROM, name, true) + when TopConstField + name = node.target.constant.value.to_sym + + builder.putobject(Object) + builder.dup + builder.defined(DEFINED_CONST_FROM, name, true) + when VarField + name = node.target.value.value.to_sym + builder.putnil + + case node.target.value + when Const + builder.defined(DEFINED_CONST, name, true) + when CVar + builder.defined(DEFINED_CVAR, name, true) + when GVar + builder.defined(DEFINED_GVAR, name, true) + end + end + + branchunless = builder.branchunless(-1) + + case node.target + when ConstPathField, TopConstField + builder.dup + builder.putobject(true) + builder.getconstant(name) + when VarField + case node.target.value + when Const + builder.opt_getconstant_path(constant_names(node.target)) + when CVar + builder.getclassvariable(name) + when GVar + builder.getglobal(name) + end + end + + builder.dup + branchif = builder.branchif(-1) + builder.pop + + branchunless[1] = builder.label + visit(node.value) + + case node.target + when ConstPathField, TopConstField + builder.dupn(2) + builder.swap + builder.setconstant(name) + when VarField + builder.dup + + case node.target.value + when Const + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.setconstant(name) + when CVar + builder.setclassvariable(name) + when GVar + builder.setglobal(name) + end + end + + branchif[1] = builder.label + end + + # Whenever a value is interpolated into a string-like structure, these + # three instructions are pushed. + def push_interpolate + builder.dup + builder.objtostring(:to_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + builder.anytostring + end + + # There are a lot of nodes in the AST that act as contains of parts of + # strings. This includes things like string literals, regular expressions, + # heredocs, etc. This method will visit all the parts of a string within + # those containers. + def visit_string_parts(node) + length = 0 + + unless node.parts.first.is_a?(TStringContent) + builder.putobject("") + length += 1 + end + + node.parts.each do |part| + case part + when StringDVar + visit(part.variable) + push_interpolate + when StringEmbExpr + visit(part) + push_interpolate + when TStringContent + builder.putobject(part.accept(RubyVisitor.new)) + end + + length += 1 + end + + length + end + + # The current instruction sequence that we're compiling is always stored + # on the compiler. When we descend into a node that has its own + # instruction sequence, this method can be called to temporarily set the + # new value of the instruction sequence, yield, and then set it back. + def with_instruction_sequence(type, name, parent_iseq, node) + previous_iseq = current_iseq + previous_builder = builder + + begin + iseq = InstructionSequence.new(type, name, parent_iseq, node.location) + + @current_iseq = iseq + @builder = + Builder.new( + iseq, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + + yield + iseq + ensure + @current_iseq = previous_iseq + @builder = previous_builder + end + end + + # When we're compiling the last statement of a set of statements within a + # scope, the instructions sometimes change from pops to leaves. These + # kinds of peephole optimizations can reduce the overall number of + # instructions. Therefore, we keep track of whether we're compiling the + # last statement of a scope and allow visit methods to query that + # information. + def with_last_statement + previous = @last_statement + @last_statement = true + + begin + yield + ensure + @last_statement = previous + end + end + + def last_statement? + @last_statement + end + + # OpAssign nodes can have a number of different kinds of nodes as their + # "target" (i.e., the left-hand side of the assignment). When compiling + # these nodes we typically need to first fetch the current value of the + # variable, then perform some kind of action, then store the result back + # into the variable. This method handles that by first fetching the value, + # then yielding to the block, then storing the result. + def with_opassign(node) + case node.target + when ARefField + builder.putnil + visit(node.target.collection) + visit(node.target.index) + + builder.dupn(2) + builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) + + yield + + builder.setn(3) + builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + builder.pop + when ConstPathField + name = node.target.constant.value.to_sym + + visit(node.target.parent) + builder.dup + builder.putobject(true) + builder.getconstant(name) + + yield + + if node.operator.value == "&&=" + builder.dupn(2) + else + builder.swap + builder.topn(1) + end + + builder.swap + builder.setconstant(name) + when TopConstField + name = node.target.constant.value.to_sym + + builder.putobject(Object) + builder.dup + builder.putobject(true) + builder.getconstant(name) + + yield + + if node.operator.value == "&&=" + builder.dupn(2) + else + builder.swap + builder.topn(1) + end + + builder.swap + builder.setconstant(name) + when VarField + case node.target.value + when Const + names = constant_names(node.target) + builder.opt_getconstant_path(names) + + yield + + builder.dup + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.setconstant(names.last) + when CVar + name = node.target.value.value.to_sym + builder.getclassvariable(name) + + yield + + builder.dup + builder.setclassvariable(name) + when GVar + name = node.target.value.value.to_sym + builder.getglobal(name) + + yield + + builder.dup + builder.setglobal(name) + when Ident + local_variable = visit(node.target) + builder.getlocal(local_variable.index, local_variable.level) + + yield + + builder.dup + builder.setlocal(local_variable.index, local_variable.level) + when IVar + name = node.target.value.value.to_sym + builder.getinstancevariable(name) + + yield + + builder.dup + builder.setinstancevariable(name) + end + end + end + end +end diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb deleted file mode 100644 index 82155d37..00000000 --- a/lib/syntax_tree/visitor/compiler.rb +++ /dev/null @@ -1,2719 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This class is an experiment in transforming Syntax Tree nodes into their - # corresponding YARV instruction sequences. It attempts to mirror the - # behavior of RubyVM::InstructionSequence.compile. - # - # You use this as with any other visitor. First you parse code into a tree, - # then you visit it with this compiler. Visiting the root node of the tree - # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. - # With that object you can call #to_a on it, which will return a serialized - # form of the instruction sequence as an array. This array _should_ mirror - # the array given by RubyVM::InstructionSequence#to_a. - # - # As an example, here is how you would compile a single expression: - # - # program = SyntaxTree.parse("1 + 2") - # program.accept(SyntaxTree::Visitor::Compiler.new).to_a - # - # [ - # "YARVInstructionSequence/SimpleDataFormat", - # 3, - # 1, - # 1, - # {:arg_size=>0, :local_size=>0, :stack_max=>2}, - # "", - # "", - # "", - # 1, - # :top, - # [], - # {}, - # [], - # [ - # [:putobject_INT2FIX_1_], - # [:putobject, 2], - # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], - # [:leave] - # ] - # ] - # - # Note that this is the same output as calling: - # - # RubyVM::InstructionSequence.compile("1 + 2").to_a - # - class Compiler < BasicVisitor - # This visitor is responsible for converting Syntax Tree nodes into their - # corresponding Ruby structures. This is used to convert the operands of - # some instructions like putobject that push a Ruby object directly onto - # the stack. It is only used when the entire structure can be represented - # at compile-time, as opposed to constructed at run-time. - class RubyVisitor < BasicVisitor - # This error is raised whenever a node cannot be converted into a Ruby - # object at compile-time. - class CompilationError < StandardError - end - - # This will attempt to compile the given node. If it's possible, then - # it will return the compiled object. Otherwise it will return nil. - def self.compile(node) - node.accept(new) - rescue CompilationError - end - - def visit_array(node) - visit_all(node.contents.parts) - end - - def visit_bare_assoc_hash(node) - node.assocs.to_h do |assoc| - # We can only convert regular key-value pairs. A double splat ** - # operator means it has to be converted at run-time. - raise CompilationError unless assoc.is_a?(Assoc) - [visit(assoc.key), visit(assoc.value)] - end - end - - def visit_float(node) - node.value.to_f - end - - alias visit_hash visit_bare_assoc_hash - - def visit_imaginary(node) - node.value.to_c - end - - def visit_int(node) - node.value.to_i - end - - def visit_label(node) - node.value.chomp(":").to_sym - end - - def visit_mrhs(node) - visit_all(node.parts) - end - - def visit_qsymbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_qwords(node) - visit_all(node.elements) - end - - def visit_range(node) - left, right = [visit(node.left), visit(node.right)] - node.operator.value === ".." ? left..right : left...right - end - - def visit_rational(node) - node.value.to_r - end - - def visit_regexp_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) - else - # Any interpolation of expressions or variables will result in the - # regular expression being constructed at run-time. - raise CompilationError - end - end - - # This isn't actually a visit method, though maybe it should be. It is - # responsible for converting the set of string options on a regular - # expression into its equivalent integer. - def visit_regexp_literal_flags(node) - node - .options - .chars - .inject(0) do |accum, option| - accum | - case option - when "i" - Regexp::IGNORECASE - when "x" - Regexp::EXTENDED - when "m" - Regexp::MULTILINE - else - raise "Unknown regexp option: #{option}" - end - end - end - - def visit_symbol_literal(node) - node.value.value.to_sym - end - - def visit_symbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_tstring_content(node) - node.value - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - # Any interpolation of expressions or variables will result in the - # string being constructed at run-time. - raise CompilationError - end - end - - def visit_words(node) - visit_all(node.elements) - end - - def visit_unsupported(_node) - raise CompilationError - end - - # Please forgive the metaprogramming here. This is used to create visit - # methods for every node that we did not explicitly handle. By default - # each of these methods will raise a CompilationError. - handled = instance_methods(false) - (Visitor.instance_methods(false) - handled).each do |method| - alias_method method, :visit_unsupported - end - end - - # This object is used to track the size of the stack at any given time. It - # is effectively a mini symbolic interpreter. It's necessary because when - # instruction sequences get serialized they include a :stack_max field on - # them. This field is used to determine how much stack space to allocate - # for the instruction sequence. - class Stack - attr_reader :current_size, :maximum_size - - def initialize - @current_size = 0 - @maximum_size = 0 - end - - def change_by(value) - @current_size += value - @maximum_size = @current_size if @current_size > @maximum_size - end - end - - # This represents every local variable associated with an instruction - # sequence. There are two kinds of locals: plain locals that are what you - # expect, and block proxy locals, which represent local variables - # associated with blocks that were passed into the current instruction - # sequence. - class LocalTable - # A local representing a block passed into the current instruction - # sequence. - class BlockLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # A regular local variable. - class PlainLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # The result of looking up a local variable in the current local table. - class Lookup - attr_reader :local, :index, :level - - def initialize(local, index, level) - @local = local - @index = index - @level = level - end - end - - attr_reader :locals - - def initialize - @locals = [] - end - - def find(name, level) - index = locals.index { |local| local.name == name } - Lookup.new(locals[index], index, level) if index - end - - def has?(name) - locals.any? { |local| local.name == name } - end - - def names - locals.map(&:name) - end - - def size - locals.length - end - - # Add a BlockLocal to the local table. - def block(name) - locals << BlockLocal.new(name) unless has?(name) - end - - # Add a PlainLocal to the local table. - def plain(name) - locals << PlainLocal.new(name) unless has?(name) - end - - # This is the offset from the top of the stack where this local variable - # lives. - def offset(index) - size - (index - 3) - 1 - end - end - - # This class is meant to mirror RubyVM::InstructionSequence. It contains a - # list of instructions along with the metadata pertaining to them. It also - # functions as a builder for the instruction sequence. - class InstructionSequence - MAGIC = "YARVInstructionSequence/SimpleDataFormat" - - # The type of the instruction sequence. - attr_reader :type - - # The name of the instruction sequence. - attr_reader :name - - # The parent instruction sequence, if there is one. - attr_reader :parent_iseq - - # The location of the root node of this instruction sequence. - attr_reader :location - - # This is the list of information about the arguments to this - # instruction sequence. - attr_accessor :argument_size - attr_reader :argument_options - - # The list of instructions for this instruction sequence. - attr_reader :insns - - # The table of local variables. - attr_reader :local_table - - # The hash of names of instance and class variables pointing to the - # index of their associated inline storage. - attr_reader :inline_storages - - # The index of the next inline storage that will be created. - attr_reader :storage_index - - # An object that will track the current size of the stack and the - # maximum size of the stack for this instruction sequence. - attr_reader :stack - - def initialize(type, name, parent_iseq, location) - @type = type - @name = name - @parent_iseq = parent_iseq - @location = location - - @argument_size = 0 - @argument_options = {} - - @local_table = LocalTable.new - @inline_storages = {} - @insns = [] - @storage_index = 0 - @stack = Stack.new - end - - def local_variable(name, level = 0) - if (lookup = local_table.find(name, level)) - lookup - elsif parent_iseq - parent_iseq.local_variable(name, level + 1) - end - end - - def push(insn) - insns << insn - insn - end - - def inline_storage - storage = storage_index - @storage_index += 1 - storage - end - - def inline_storage_for(name) - unless inline_storages.key?(name) - inline_storages[name] = inline_storage - end - - inline_storages[name] - end - - def length - insns.inject(0) do |sum, insn| - insn.is_a?(Array) ? sum + insn.length : sum - end - end - - def each_child - insns.each do |insn| - insn[1..].each do |operand| - yield operand if operand.is_a?(InstructionSequence) - end - end - end - - def to_a - versions = RUBY_VERSION.split(".").map(&:to_i) - - [ - MAGIC, - versions[0], - versions[1], - 1, - { - arg_size: argument_size, - local_size: local_table.size, - stack_max: stack.maximum_size - }, - name, - "", - "", - location.start_line, - type, - local_table.names, - argument_options, - [], - insns.map { |insn| serialize(insn) } - ] - end - - private - - def serialize(insn) - case insn[0] - when :checkkeyword, :getblockparam, :getblockparamproxy, - :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, - :setlocal_WC_1, :setlocal - iseq = self - - case insn[0] - when :getlocal_WC_1, :setlocal_WC_1 - iseq = iseq.parent_iseq - when :getblockparam, :getblockparamproxy, :getlocal, :setlocal - insn[2].times { iseq = iseq.parent_iseq } - end - - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] - when :defineclass - [insn[0], insn[1], insn[2].to_a, insn[3]] - when :definemethod, :definesmethod - [insn[0], insn[1], insn[2].to_a] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - when :once - [insn[0], insn[1].to_a, insn[2]] - else - insn - end - end - end - - # This class serves as a layer of indirection between the instruction - # sequence and the compiler. It allows us to provide different behavior - # for certain instructions depending on the Ruby version. For example, - # class variable reads and writes gained an inline cache in Ruby 3.0. So - # we place the logic for checking the Ruby version in this class. - class Builder - attr_reader :iseq, :stack - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - def initialize( - iseq, - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @iseq = iseq - @stack = iseq.stack - - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - end - - # This creates a new label at the current length of the instruction - # sequence. It is used as the operand for jump instructions. - def label - name = :"label_#{iseq.length}" - iseq.insns.last == name ? name : event(name) - end - - def event(name) - iseq.push(name) - name - end - - def adjuststack(number) - stack.change_by(-number) - iseq.push([:adjuststack, number]) - end - - def anytostring - stack.change_by(-2 + 1) - iseq.push([:anytostring]) - end - - def branchif(index) - stack.change_by(-1) - iseq.push([:branchif, index]) - end - - def branchnil(index) - stack.change_by(-1) - iseq.push([:branchnil, index]) - end - - def branchunless(index) - stack.change_by(-1) - iseq.push([:branchunless, index]) - end - - def checkkeyword(index, keyword_index) - stack.change_by(+1) - iseq.push([:checkkeyword, index, keyword_index]) - end - - def concatarray - stack.change_by(-2 + 1) - iseq.push([:concatarray]) - end - - def concatstrings(number) - stack.change_by(-number + 1) - iseq.push([:concatstrings, number]) - end - - def defined(type, name, message) - stack.change_by(-1 + 1) - iseq.push([:defined, type, name, message]) - end - - def defineclass(name, class_iseq, flags) - stack.change_by(-2 + 1) - iseq.push([:defineclass, name, class_iseq, flags]) - end - - def definemethod(name, method_iseq) - stack.change_by(0) - iseq.push([:definemethod, name, method_iseq]) - end - - def definesmethod(name, method_iseq) - stack.change_by(-1) - iseq.push([:definesmethod, name, method_iseq]) - end - - def dup - stack.change_by(-1 + 2) - iseq.push([:dup]) - end - - def duparray(object) - stack.change_by(+1) - iseq.push([:duparray, object]) - end - - def duphash(object) - stack.change_by(+1) - iseq.push([:duphash, object]) - end - - def dupn(number) - stack.change_by(+number) - iseq.push([:dupn, number]) - end - - def expandarray(length, flag) - stack.change_by(-1 + length) - iseq.push([:expandarray, length, flag]) - end - - def getblockparam(index, level) - stack.change_by(+1) - iseq.push([:getblockparam, index, level]) - end - - def getblockparamproxy(index, level) - stack.change_by(+1) - iseq.push([:getblockparamproxy, index, level]) - end - - def getclassvariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.0" - iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) - else - iseq.push([:getclassvariable, name]) - end - end - - def getconstant(name) - stack.change_by(-2 + 1) - iseq.push([:getconstant, name]) - end - - def getglobal(name) - stack.change_by(+1) - iseq.push([:getglobal, name]) - end - - def getinstancevariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.2" - iseq.push([:getinstancevariable, name, iseq.inline_storage]) - else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:getinstancevariable, name, inline_storage]) - end - end - - def getlocal(index, level) - stack.change_by(+1) - - if operands_unification - # Specialize the getlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will look at the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:getlocal_WC_0, index]) - when 1 - iseq.push([:getlocal_WC_1, index]) - else - iseq.push([:getlocal, index, level]) - end - else - iseq.push([:getlocal, index, level]) - end - end - - def getspecial(key, type) - stack.change_by(-0 + 1) - iseq.push([:getspecial, key, type]) - end - - def intern - stack.change_by(-1 + 1) - iseq.push([:intern]) - end - - def invokeblock(method_id, argc, flag) - stack.change_by(-argc + 1) - iseq.push([:invokeblock, call_data(method_id, argc, flag)]) - end - - def invokesuper(method_id, argc, flag, block_iseq) - stack.change_by(-(argc + 1) + 1) - - cdata = call_data(method_id, argc, flag) - iseq.push([:invokesuper, cdata, block_iseq]) - end - - def jump(index) - stack.change_by(0) - iseq.push([:jump, index]) - end - - def leave - stack.change_by(-1) - iseq.push([:leave]) - end - - def newarray(length) - stack.change_by(-length + 1) - iseq.push([:newarray, length]) - end - - def newhash(length) - stack.change_by(-length + 1) - iseq.push([:newhash, length]) - end - - def newrange(flag) - stack.change_by(-2 + 1) - iseq.push([:newrange, flag]) - end - - def nop - stack.change_by(0) - iseq.push([:nop]) - end - - def objtostring(method_id, argc, flag) - stack.change_by(-1 + 1) - iseq.push([:objtostring, call_data(method_id, argc, flag)]) - end - - def once(postexe_iseq, inline_storage) - stack.change_by(+1) - iseq.push([:once, postexe_iseq, inline_storage]) - end - - def opt_getconstant_path(names) - if RUBY_VERSION >= "3.2" - stack.change_by(+1) - iseq.push([:opt_getconstant_path, names]) - else - inline_storage = iseq.inline_storage - getinlinecache = opt_getinlinecache(-1, inline_storage) - - if names[0] == :"" - names.shift - pop - putobject(Object) - end - - names.each_with_index do |name, index| - putobject(index == 0) - getconstant(name) - end - - opt_setinlinecache(inline_storage) - getinlinecache[1] = label - end - end - - def opt_getinlinecache(offset, inline_storage) - stack.change_by(+1) - iseq.push([:opt_getinlinecache, offset, inline_storage]) - end - - def opt_newarray_max(length) - if specialized_instruction - stack.change_by(-length + 1) - iseq.push([:opt_newarray_max, length]) - else - newarray(length) - send(:max, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_newarray_min(length) - if specialized_instruction - stack.change_by(-length + 1) - iseq.push([:opt_newarray_min, length]) - else - newarray(length) - send(:min, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_setinlinecache(inline_storage) - stack.change_by(-1 + 1) - iseq.push([:opt_setinlinecache, inline_storage]) - end - - def opt_str_freeze(value) - if specialized_instruction - stack.change_by(+1) - iseq.push( - [ - :opt_str_freeze, - value, - call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) - ] - ) - else - putstring(value) - send(:freeze, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_str_uminus(value) - if specialized_instruction - stack.change_by(+1) - iseq.push( - [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] - ) - else - putstring(value) - send(:-@, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def pop - stack.change_by(-1) - iseq.push([:pop]) - end - - def putnil - stack.change_by(+1) - iseq.push([:putnil]) - end - - def putobject(object) - stack.change_by(+1) - - if operands_unification - # Specialize the putobject instruction based on the value of the - # object. If it's 0 or 1, then there's a specialized instruction - # that will push the object onto the stack and requires fewer - # operands. - if object.eql?(0) - iseq.push([:putobject_INT2FIX_0_]) - elsif object.eql?(1) - iseq.push([:putobject_INT2FIX_1_]) - else - iseq.push([:putobject, object]) - end - else - iseq.push([:putobject, object]) - end - end - - def putself - stack.change_by(+1) - iseq.push([:putself]) - end - - def putspecialobject(object) - stack.change_by(+1) - iseq.push([:putspecialobject, object]) - end - - def putstring(object) - stack.change_by(+1) - iseq.push([:putstring, object]) - end - - def send(method_id, argc, flag, block_iseq = nil) - stack.change_by(-(argc + 1) + 1) - cdata = call_data(method_id, argc, flag) - - if specialized_instruction - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and the - # number of arguments. - - # stree-ignore - if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 - case [method_id, argc] - when [:length, 0] then iseq.push([:opt_length, cdata]) - when [:size, 0] then iseq.push([:opt_size, cdata]) - when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) - when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) - when [:succ, 0] then iseq.push([:opt_succ, cdata]) - when [:!, 0] then iseq.push([:opt_not, cdata]) - when [:+, 1] then iseq.push([:opt_plus, cdata]) - when [:-, 1] then iseq.push([:opt_minus, cdata]) - when [:*, 1] then iseq.push([:opt_mult, cdata]) - when [:/, 1] then iseq.push([:opt_div, cdata]) - when [:%, 1] then iseq.push([:opt_mod, cdata]) - when [:==, 1] then iseq.push([:opt_eq, cdata]) - when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) - when [:<, 1] then iseq.push([:opt_lt, cdata]) - when [:<=, 1] then iseq.push([:opt_le, cdata]) - when [:>, 1] then iseq.push([:opt_gt, cdata]) - when [:>=, 1] then iseq.push([:opt_ge, cdata]) - when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) - when [:[], 1] then iseq.push([:opt_aref, cdata]) - when [:&, 1] then iseq.push([:opt_and, cdata]) - when [:|, 1] then iseq.push([:opt_or, cdata]) - when [:[]=, 2] then iseq.push([:opt_aset, cdata]) - when [:!=, 1] - eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) - iseq.push([:opt_neq, eql_data, cdata]) - else - iseq.push([:opt_send_without_block, cdata]) - end - else - iseq.push([:send, cdata, block_iseq]) - end - else - iseq.push([:send, cdata, block_iseq]) - end - end - - def setclassvariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.0" - iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) - else - iseq.push([:setclassvariable, name]) - end - end - - def setconstant(name) - stack.change_by(-2) - iseq.push([:setconstant, name]) - end - - def setglobal(name) - stack.change_by(-1) - iseq.push([:setglobal, name]) - end - - def setinstancevariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.2" - iseq.push([:setinstancevariable, name, iseq.inline_storage]) - else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:setinstancevariable, name, inline_storage]) - end - end - - def setlocal(index, level) - stack.change_by(-1) - - if operands_unification - # Specialize the setlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will write to the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:setlocal_WC_0, index]) - when 1 - iseq.push([:setlocal_WC_1, index]) - else - iseq.push([:setlocal, index, level]) - end - else - iseq.push([:setlocal, index, level]) - end - end - - def setn(number) - stack.change_by(-1 + 1) - iseq.push([:setn, number]) - end - - def splatarray(flag) - stack.change_by(-1 + 1) - iseq.push([:splatarray, flag]) - end - - def swap - stack.change_by(-2 + 2) - iseq.push([:swap]) - end - - def topn(number) - stack.change_by(+1) - iseq.push([:topn, number]) - end - - def toregexp(options, length) - stack.change_by(-length + 1) - iseq.push([:toregexp, options, length]) - end - - private - - # This creates a call data object that is used as the operand for the - # send, invokesuper, and objtostring instructions. - def call_data(method_id, argc, flag) - { mid: method_id, flag: flag, orig_argc: argc } - end - end - - # These constants correspond to the putspecialobject instruction. They are - # used to represent special objects that are pushed onto the stack. - VM_SPECIAL_OBJECT_VMCORE = 1 - VM_SPECIAL_OBJECT_CBASE = 2 - VM_SPECIAL_OBJECT_CONST_BASE = 3 - - # These constants correspond to the flag passed as part of the call data - # structure on the send instruction. They are used to represent various - # metadata about the callsite (e.g., were keyword arguments used?, was a - # block given?, etc.). - VM_CALL_ARGS_SPLAT = 1 << 0 - VM_CALL_ARGS_BLOCKARG = 1 << 1 - VM_CALL_FCALL = 1 << 2 - VM_CALL_VCALL = 1 << 3 - VM_CALL_ARGS_SIMPLE = 1 << 4 - VM_CALL_BLOCKISEQ = 1 << 5 - VM_CALL_KWARG = 1 << 6 - VM_CALL_KW_SPLAT = 1 << 7 - VM_CALL_TAILCALL = 1 << 8 - VM_CALL_SUPER = 1 << 9 - VM_CALL_ZSUPER = 1 << 10 - VM_CALL_OPT_SEND = 1 << 11 - VM_CALL_KW_SPLAT_MUT = 1 << 12 - - # These constants correspond to the value passed as part of the defined - # instruction. It's an enum defined in the CRuby codebase that tells that - # instruction what kind of defined check to perform. - DEFINED_NIL = 1 - DEFINED_IVAR = 2 - DEFINED_LVAR = 3 - DEFINED_GVAR = 4 - DEFINED_CVAR = 5 - DEFINED_CONST = 6 - DEFINED_METHOD = 7 - DEFINED_YIELD = 8 - DEFINED_ZSUPER = 9 - DEFINED_SELF = 10 - DEFINED_TRUE = 11 - DEFINED_FALSE = 12 - DEFINED_ASGN = 13 - DEFINED_EXPR = 14 - DEFINED_REF = 15 - DEFINED_FUNC = 16 - DEFINED_CONST_FROM = 17 - - # These constants correspond to the value passed in the flags as part of - # the defineclass instruction. - VM_DEFINECLASS_TYPE_CLASS = 0 - VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 - VM_DEFINECLASS_TYPE_MODULE = 2 - VM_DEFINECLASS_FLAG_SCOPED = 8 - VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 - - # These options mirror the compilation options that we currently support - # that can be also passed to RubyVM::InstructionSequence.compile. - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - # The current instruction sequence that is being compiled. - attr_reader :current_iseq - - # This is the current builder that is being used to construct the current - # instruction sequence. - attr_reader :builder - - # A boolean to track if we're currently compiling the last statement - # within a set of statements. This information is necessary to determine - # if we need to return the value of the last statement. - attr_reader :last_statement - - def initialize( - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - - @current_iseq = nil - @builder = nil - @last_statement = false - end - - def visit_BEGIN(node) - visit(node.statements) - end - - def visit_CHAR(node) - if frozen_string_literal - builder.putobject(node.value[1..]) - else - builder.putstring(node.value[1..]) - end - end - - def visit_END(node) - name = "block in #{current_iseq.name}" - once_iseq = - with_instruction_sequence(:block, name, current_iseq, node) do - postexe_iseq = - with_instruction_sequence(:block, name, current_iseq, node) do - *statements, last_statement = node.statements.body - visit_all(statements) - with_last_statement { visit(last_statement) } - builder.leave - end - - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.send(:"core#set_postexe", 0, VM_CALL_FCALL, postexe_iseq) - builder.leave - end - - builder.once(once_iseq, current_iseq.inline_storage) - builder.pop - end - - def visit_alias(node) - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) - visit(node.left) - visit(node.right) - builder.send(:"core#set_method_alias", 3, VM_CALL_ARGS_SIMPLE) - end - - def visit_aref(node) - visit(node.collection) - visit(node.index) - builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) - end - - def visit_arg_block(node) - visit(node.value) - end - - def visit_arg_paren(node) - visit(node.arguments) - end - - def visit_arg_star(node) - visit(node.value) - builder.splatarray(false) - end - - def visit_args(node) - visit_all(node.parts) - end - - def visit_array(node) - if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) - else - length = 0 - - node.contents.parts.each do |part| - if part.is_a?(ArgStar) - if length > 0 - builder.newarray(length) - length = 0 - end - - visit(part.value) - builder.concatarray - else - visit(part) - length += 1 - end - end - - builder.newarray(length) if length > 0 - if length > 0 && length != node.contents.parts.length - builder.concatarray - end - end - end - - def visit_assign(node) - case node.target - when ARefField - builder.putnil - visit(node.target.collection) - visit(node.target.index) - visit(node.value) - builder.setn(3) - builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) - builder.pop - when ConstPathField - names = constant_names(node.target) - name = names.pop - - if RUBY_VERSION >= "3.2" - builder.opt_getconstant_path(names) - visit(node.value) - builder.swap - builder.topn(1) - builder.swap - builder.setconstant(name) - else - visit(node.value) - builder.dup if last_statement? - builder.opt_getconstant_path(names) - builder.setconstant(name) - end - when Field - builder.putnil - visit(node.target) - visit(node.value) - builder.setn(2) - builder.send(:"#{node.target.name.value}=", 1, VM_CALL_ARGS_SIMPLE) - builder.pop - when TopConstField - name = node.target.constant.value.to_sym - - if RUBY_VERSION >= "3.2" - builder.putobject(Object) - visit(node.value) - builder.swap - builder.topn(1) - builder.swap - builder.setconstant(name) - else - visit(node.value) - builder.dup if last_statement? - builder.putobject(Object) - builder.setconstant(name) - end - when VarField - visit(node.value) - builder.dup if last_statement? - - case node.target.value - when Const - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(node.target.value.value.to_sym) - when CVar - builder.setclassvariable(node.target.value.value.to_sym) - when GVar - builder.setglobal(node.target.value.value.to_sym) - when Ident - local_variable = visit(node.target) - builder.setlocal(local_variable.index, local_variable.level) - when IVar - builder.setinstancevariable(node.target.value.value.to_sym) - end - end - end - - def visit_assoc(node) - visit(node.key) - visit(node.value) - end - - def visit_assoc_splat(node) - visit(node.value) - end - - def visit_backref(node) - builder.getspecial(1, 2 * node.value[1..].to_i) - end - - def visit_bare_assoc_hash(node) - if (compiled = RubyVisitor.compile(node)) - builder.duphash(compiled) - else - visit_all(node.assocs) - end - end - - def visit_binary(node) - case node.operator - when :"&&" - visit(node.left) - builder.dup - - branchunless = builder.branchunless(-1) - builder.pop - - visit(node.right) - branchunless[1] = builder.label - when :"||" - visit(node.left) - builder.dup - - branchif = builder.branchif(-1) - builder.pop - - visit(node.right) - branchif[1] = builder.label - else - visit(node.left) - visit(node.right) - builder.send(node.operator, 1, VM_CALL_ARGS_SIMPLE) - end - end - - def visit_block(node) - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_B_CALL) - visit(node.block_var) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave - end - end - - def visit_block_var(node) - params = node.params - - if params.requireds.length == 1 && params.optionals.empty? && - !params.rest && params.posts.empty? && params.keywords.empty? && - !params.keyword_rest && !params.block - current_iseq.argument_options[:ambiguous_param0] = true - end - - visit(node.params) - - node.locals.each do |local| - current_iseq.local_table.plain(local.value.to_sym) - end - end - - def visit_blockarg(node) - current_iseq.argument_options[:block_start] = current_iseq.argument_size - current_iseq.local_table.block(node.name.value.to_sym) - current_iseq.argument_size += 1 - end - - def visit_bodystmt(node) - visit(node.statements) - end - - def visit_call(node) - if node.is_a?(CallNode) - return( - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: nil, - location: node.location - ) - ) - ) - end - - arg_parts = argument_parts(node.arguments) - argc = arg_parts.length - - # First we're going to check if we're calling a method on an array - # literal without any arguments. In that case there are some - # specializations we might be able to perform. - if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) - case node.receiver - when ArrayLiteral - parts = node.receiver.contents&.parts || [] - - if parts.none? { |part| part.is_a?(ArgStar) } && - RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "max" - visit(node.receiver.contents) - builder.opt_newarray_max(parts.length) - return - when "min" - visit(node.receiver.contents) - builder.opt_newarray_min(parts.length) - return - end - end - when StringLiteral - if RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "-@" - builder.opt_str_uminus(node.receiver.parts.first.value) - return - when "freeze" - builder.opt_str_freeze(node.receiver.parts.first.value) - return - end - end - end - end - - if node.receiver - if node.receiver.is_a?(VarRef) && - ( - lookup = - current_iseq.local_variable(node.receiver.value.value.to_sym) - ) && lookup.local.is_a?(LocalTable::BlockLocal) - builder.getblockparamproxy(lookup.index, lookup.level) - else - visit(node.receiver) - end - else - builder.putself - end - - branchnil = - if node.operator&.value == "&." - builder.dup - builder.branchnil(-1) - end - - flag = 0 - - arg_parts.each do |arg_part| - case arg_part - when ArgBlock - argc -= 1 - flag |= VM_CALL_ARGS_BLOCKARG - visit(arg_part) - when ArgStar - flag |= VM_CALL_ARGS_SPLAT - visit(arg_part) - when ArgsForward - flag |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_BLOCKARG - - lookup = current_iseq.local_table.find(:*, 0) - builder.getlocal(lookup.index, lookup.level) - builder.splatarray(arg_parts.length != 1) - - lookup = current_iseq.local_table.find(:&, 0) - builder.getblockparamproxy(lookup.index, lookup.level) - when BareAssocHash - flag |= VM_CALL_KW_SPLAT - visit(arg_part) - else - visit(arg_part) - end - end - - block_iseq = visit(node.block) if node.block - flag |= VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 - flag |= VM_CALL_FCALL if node.receiver.nil? - - builder.send(node.message.value.to_sym, argc, flag, block_iseq) - branchnil[1] = builder.label if branchnil - end - - def visit_case(node) - visit(node.value) if node.value - - clauses = [] - else_clause = nil - - current = node.consequent - - while current - clauses << current - - if (current = current.consequent).is_a?(Else) - else_clause = current - break - end - end - - branches = - clauses.map do |clause| - visit(clause.arguments) - builder.topn(1) - builder.send(:===, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) - [clause, builder.branchif(:label_00)] - end - - builder.pop - - else_clause ? visit(else_clause) : builder.putnil - - builder.leave - - branches.each_with_index do |(clause, branchif), index| - builder.leave if index != 0 - branchif[1] = builder.label - builder.pop - visit(clause) - end - end - - def visit_class(node) - name = node.constant.constant.value.to_sym - class_iseq = - with_instruction_sequence( - :class, - "", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave - end - - flags = VM_DEFINECLASS_TYPE_CLASS - - case node.constant - when ConstPathRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - when TopConstRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - builder.putobject(Object) - end - - if node.superclass - flags |= VM_DEFINECLASS_FLAG_HAS_SUPERCLASS - visit(node.superclass) - else - builder.putnil - end - - builder.defineclass(name, class_iseq, flags) - end - - def visit_command(node) - visit_call( - CommandCall.new( - receiver: nil, - operator: nil, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_command_call(node) - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_const_path_field(node) - visit(node.parent) - end - - def visit_const_path_ref(node) - names = constant_names(node) - builder.opt_getconstant_path(names) - end - - def visit_def(node) - method_iseq = - with_instruction_sequence( - :method, - node.name.value, - current_iseq, - node - ) do - visit(node.params) if node.params - builder.event(:RUBY_EVENT_CALL) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_RETURN) - builder.leave - end - - name = node.name.value.to_sym - - if node.target - visit(node.target) - builder.definesmethod(name, method_iseq) - else - builder.definemethod(name, method_iseq) - end - - builder.putobject(name) - end - - def visit_defined(node) - case node.value - when Assign - # If we're assigning to a local variable, then we need to make sure - # that we put it into the local table. - if node.value.target.is_a?(VarField) && - node.value.target.value.is_a?(Ident) - current_iseq.local_table.plain(node.value.target.value.value.to_sym) - end - - builder.putobject("assignment") - when VarRef - value = node.value.value - name = value.value.to_sym - - case value - when Const - builder.putnil - builder.defined(DEFINED_CONST, name, "constant") - when CVar - builder.putnil - builder.defined(DEFINED_CVAR, name, "class variable") - when GVar - builder.putnil - builder.defined(DEFINED_GVAR, name, "global-variable") - when Ident - builder.putobject("local-variable") - when IVar - builder.putnil - builder.defined(DEFINED_IVAR, name, "instance-variable") - when Kw - case name - when :false - builder.putobject("false") - when :nil - builder.putobject("nil") - when :self - builder.putobject("self") - when :true - builder.putobject("true") - end - end - when VCall - builder.putself - - name = node.value.value.value.to_sym - builder.defined(DEFINED_FUNC, name, "method") - when YieldNode - builder.putnil - builder.defined(DEFINED_YIELD, false, "yield") - when ZSuper - builder.putnil - builder.defined(DEFINED_ZSUPER, false, "super") - else - builder.putobject("expression") - end - end - - def visit_dyna_symbol(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - builder.putobject(node.parts.first.value.to_sym) - end - end - - def visit_else(node) - visit(node.statements) - builder.pop unless last_statement? - end - - def visit_elsif(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.statements, - consequent: node.consequent, - location: node.location - ) - ) - end - - def visit_field(node) - visit(node.parent) - end - - def visit_float(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_for(node) - visit(node.collection) - - name = node.index.value.value.to_sym - current_iseq.local_table.plain(name) - - block_iseq = - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node.statements - ) do - current_iseq.argument_options[:lead_num] ||= 0 - current_iseq.argument_options[:lead_num] += 1 - current_iseq.argument_options[:ambiguous_param0] = true - - current_iseq.argument_size += 1 - current_iseq.local_table.plain(2) - - builder.getlocal(0, 0) - - local_variable = current_iseq.local_variable(name) - builder.setlocal(local_variable.index, local_variable.level) - - builder.event(:RUBY_EVENT_B_CALL) - builder.nop - - visit(node.statements) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave - end - - builder.send(:each, 0, 0, block_iseq) - end - - def visit_hash(node) - builder.duphash(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - visit_all(node.assocs) - builder.newhash(node.assocs.length * 2) - end - - def visit_heredoc(node) - if node.beginning.value.end_with?("`") - visit_xstring_literal(node) - elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - builder.concatstrings(length) - end - end - - def visit_if(node) - visit(node.predicate) - branchunless = builder.branchunless(-1) - visit(node.statements) - - if last_statement? - builder.leave - branchunless[1] = builder.label - - node.consequent ? visit(node.consequent) : builder.putnil - else - builder.pop - - if node.consequent - jump = builder.jump(-1) - branchunless[1] = builder.label - visit(node.consequent) - jump[1] = builder.label - else - branchunless[1] = builder.label - end - end - end - - def visit_if_op(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.truthy, - consequent: - Else.new( - keyword: Kw.new(value: "else", location: Location.default), - statements: node.falsy, - location: Location.default - ), - location: Location.default - ) - ) - end - - def visit_imaginary(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_int(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_kwrest_param(node) - current_iseq.argument_options[:kwrest] = current_iseq.argument_size - current_iseq.argument_size += 1 - current_iseq.local_table.plain(node.name.value.to_sym) - end - - def visit_label(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_lambda(node) - lambda_iseq = - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_B_CALL) - visit(node.params) - visit(node.statements) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave - end - - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.send(:lambda, 0, VM_CALL_FCALL, lambda_iseq) - end - - def visit_lambda_var(node) - visit_block_var(node) - end - - def visit_massign(node) - visit(node.value) - builder.dup - visit(node.target) - end - - def visit_method_add_block(node) - visit_call( - CommandCall.new( - receiver: node.call.receiver, - operator: node.call.operator, - message: node.call.message, - arguments: node.call.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_mlhs(node) - lookups = [] - - node.parts.each do |part| - case part - when VarField - lookups << visit(part) - end - end - - builder.expandarray(lookups.length, 0) - - lookups.each { |lookup| builder.setlocal(lookup.index, lookup.level) } - end - - def visit_module(node) - name = node.constant.constant.value.to_sym - module_iseq = - with_instruction_sequence( - :class, - "", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave - end - - flags = VM_DEFINECLASS_TYPE_MODULE - - case node.constant - when ConstPathRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - when TopConstRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - builder.putobject(Object) - end - - builder.putnil - builder.defineclass(name, module_iseq, flags) - end - - def visit_mrhs(node) - if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) - else - visit_all(node.parts) - builder.newarray(node.parts.length) - end - end - - def visit_not(node) - visit(node.statement) - builder.send(:!, 0, VM_CALL_ARGS_SIMPLE) - end - - def visit_opassign(node) - flag = VM_CALL_ARGS_SIMPLE - if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) - flag |= VM_CALL_FCALL - end - - case (operator = node.operator.value.chomp("=").to_sym) - when :"&&" - branchunless = nil - - with_opassign(node) do - builder.dup - branchunless = builder.branchunless(-1) - builder.pop - visit(node.value) - end - - case node.target - when ARefField - builder.leave - branchunless[1] = builder.label - builder.setn(3) - builder.adjuststack(3) - when ConstPathField, TopConstField - branchunless[1] = builder.label - builder.swap - builder.pop - else - branchunless[1] = builder.label - end - when :"||" - if node.target.is_a?(ConstPathField) || - node.target.is_a?(TopConstField) - opassign_defined(node) - builder.swap - builder.pop - elsif node.target.is_a?(VarField) && - [Const, CVar, GVar].include?(node.target.value.class) - opassign_defined(node) - else - branchif = nil - - with_opassign(node) do - builder.dup - branchif = builder.branchif(-1) - builder.pop - visit(node.value) - end - - if node.target.is_a?(ARefField) - builder.leave - branchif[1] = builder.label - builder.setn(3) - builder.adjuststack(3) - else - branchif[1] = builder.label - end - end - else - with_opassign(node) do - visit(node.value) - builder.send(operator, 1, flag) - end - end - end - - def visit_params(node) - argument_options = current_iseq.argument_options - - if node.requireds.any? - argument_options[:lead_num] = 0 - - node.requireds.each do |required| - current_iseq.local_table.plain(required.value.to_sym) - current_iseq.argument_size += 1 - argument_options[:lead_num] += 1 - end - end - - node.optionals.each do |(optional, value)| - index = current_iseq.local_table.size - name = optional.value.to_sym - - current_iseq.local_table.plain(name) - current_iseq.argument_size += 1 - - unless argument_options.key?(:opt) - argument_options[:opt] = [builder.label] - end - - visit(value) - builder.setlocal(index, 0) - current_iseq.argument_options[:opt] << builder.label - end - - visit(node.rest) if node.rest - - if node.posts.any? - argument_options[:post_start] = current_iseq.argument_size - argument_options[:post_num] = 0 - - node.posts.each do |post| - current_iseq.local_table.plain(post.value.to_sym) - current_iseq.argument_size += 1 - argument_options[:post_num] += 1 - end - end - - if node.keywords.any? - argument_options[:kwbits] = 0 - argument_options[:keyword] = [] - checkkeywords = [] - - node.keywords.each_with_index do |(keyword, value), keyword_index| - name = keyword.value.chomp(":").to_sym - index = current_iseq.local_table.size - - current_iseq.local_table.plain(name) - current_iseq.argument_size += 1 - argument_options[:kwbits] += 1 - - if value.nil? - argument_options[:keyword] << name - else - begin - compiled = value.accept(RubyVisitor.new) - argument_options[:keyword] << [name, compiled] - rescue RubyVisitor::CompilationError - argument_options[:keyword] << [name] - checkkeywords << builder.checkkeyword(-1, keyword_index) - branchif = builder.branchif(-1) - visit(value) - builder.setlocal(index, 0) - branchif[1] = builder.label - end - end - end - - name = node.keyword_rest ? 3 : 2 - current_iseq.argument_size += 1 - current_iseq.local_table.plain(name) - - lookup = current_iseq.local_table.find(name, 0) - checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } - end - - if node.keyword_rest.is_a?(ArgsForward) - current_iseq.local_table.plain(:*) - current_iseq.local_table.plain(:&) - - current_iseq.argument_options[ - :rest_start - ] = current_iseq.argument_size - current_iseq.argument_options[ - :block_start - ] = current_iseq.argument_size + 1 - - current_iseq.argument_size += 2 - elsif node.keyword_rest - visit(node.keyword_rest) - end - - visit(node.block) if node.block - end - - def visit_paren(node) - visit(node.contents) - end - - def visit_program(node) - node.statements.body.each do |statement| - break unless statement.is_a?(Comment) - - if statement.value == "# frozen_string_literal: true" - @frozen_string_literal = true - end - end - - preexes = [] - statements = [] - - node.statements.body.each do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - # ignore - when BEGINBlock - preexes << statement - else - statements << statement - end - end - - with_instruction_sequence(:top, "", nil, node) do - visit_all(preexes) - - if statements.empty? - builder.putnil - else - *statements, last_statement = statements - visit_all(statements) - with_last_statement { visit(last_statement) } - end - - builder.leave - end - end - - def visit_qsymbols(node) - builder.duparray(node.accept(RubyVisitor.new)) - end - - def visit_qwords(node) - if frozen_string_literal - builder.duparray(node.accept(RubyVisitor.new)) - else - visit_all(node.elements) - builder.newarray(node.elements.length) - end - end - - def visit_range(node) - builder.putobject(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - visit(node.left) - visit(node.right) - builder.newrange(node.operator.value == ".." ? 0 : 1) - end - - def visit_rational(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_regexp_literal(node) - builder.putobject(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - flags = RubyVisitor.new.visit_regexp_literal_flags(node) - length = visit_string_parts(node) - builder.toregexp(flags, length) - end - - def visit_rest_param(node) - current_iseq.local_table.plain(node.name.value.to_sym) - current_iseq.argument_options[:rest_start] = current_iseq.argument_size - current_iseq.argument_size += 1 - end - - def visit_sclass(node) - visit(node.target) - builder.putnil - - singleton_iseq = - with_instruction_sequence( - :class, - "singleton class", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave - end - - builder.defineclass( - :singletonclass, - singleton_iseq, - VM_DEFINECLASS_TYPE_SINGLETON_CLASS - ) - end - - def visit_statements(node) - statements = - node.body.select do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - false - else - true - end - end - - statements.empty? ? builder.putnil : visit_all(statements) - end - - def visit_string_concat(node) - value = node.left.parts.first.value + node.right.parts.first.value - content = TStringContent.new(value: value, location: node.location) - - literal = - StringLiteral.new( - parts: [content], - quote: node.left.quote, - location: node.location - ) - visit_string_literal(literal) - end - - def visit_string_embexpr(node) - visit(node.statements) - end - - def visit_string_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - builder.concatstrings(length) - end - end - - def visit_super(node) - builder.putself - visit(node.arguments) - builder.invokesuper( - nil, - argument_parts(node.arguments).length, - VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER, - nil - ) - end - - def visit_symbol_literal(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_symbols(node) - builder.duparray(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - node.elements.each do |element| - if element.parts.length == 1 && - element.parts.first.is_a?(TStringContent) - builder.putobject(element.parts.first.value.to_sym) - else - length = visit_string_parts(element) - builder.concatstrings(length) - builder.intern - end - end - - builder.newarray(node.elements.length) - end - - def visit_top_const_ref(node) - builder.opt_getconstant_path(constant_names(node)) - end - - def visit_tstring_content(node) - if frozen_string_literal - builder.putobject(node.accept(RubyVisitor.new)) - else - builder.putstring(node.accept(RubyVisitor.new)) - end - end - - def visit_unary(node) - method_id = - case node.operator - when "+", "-" - "#{node.operator}@" - else - node.operator - end - - visit_call( - CommandCall.new( - receiver: node.statement, - operator: nil, - message: Ident.new(value: method_id, location: Location.default), - arguments: nil, - block: nil, - location: Location.default - ) - ) - end - - def visit_undef(node) - node.symbols.each_with_index do |symbol, index| - builder.pop if index != 0 - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) - visit(symbol) - builder.send(:"core#undef_method", 2, VM_CALL_ARGS_SIMPLE) - end - end - - def visit_unless(node) - visit(node.predicate) - branchunless = builder.branchunless(-1) - node.consequent ? visit(node.consequent) : builder.putnil - - if last_statement? - builder.leave - branchunless[1] = builder.label - - visit(node.statements) - else - builder.pop - - if node.consequent - jump = builder.jump(-1) - branchunless[1] = builder.label - visit(node.consequent) - jump[1] = builder.label - else - branchunless[1] = builder.label - end - end - end - - def visit_until(node) - jumps = [] - - jumps << builder.jump(-1) - builder.putnil - builder.pop - jumps << builder.jump(-1) - - label = builder.label - visit(node.statements) - builder.pop - jumps.each { |jump| jump[1] = builder.label } - - visit(node.predicate) - builder.branchunless(label) - builder.putnil if last_statement? - end - - def visit_var_field(node) - case node.value - when CVar, IVar - name = node.value.value.to_sym - current_iseq.inline_storage_for(name) - when Ident - name = node.value.value.to_sym - - if (local_variable = current_iseq.local_variable(name)) - local_variable - else - current_iseq.local_table.plain(name) - current_iseq.local_variable(name) - end - end - end - - def visit_var_ref(node) - case node.value - when Const - builder.opt_getconstant_path(constant_names(node)) - when CVar - name = node.value.value.to_sym - builder.getclassvariable(name) - when GVar - builder.getglobal(node.value.value.to_sym) - when Ident - lookup = current_iseq.local_variable(node.value.value.to_sym) - - case lookup.local - when LocalTable::BlockLocal - builder.getblockparam(lookup.index, lookup.level) - when LocalTable::PlainLocal - builder.getlocal(lookup.index, lookup.level) - end - when IVar - name = node.value.value.to_sym - builder.getinstancevariable(name) - when Kw - case node.value.value - when "false" - builder.putobject(false) - when "nil" - builder.putnil - when "self" - builder.putself - when "true" - builder.putobject(true) - end - end - end - - def visit_vcall(node) - builder.putself - - flag = VM_CALL_FCALL | VM_CALL_VCALL | VM_CALL_ARGS_SIMPLE - builder.send(node.value.value.to_sym, 0, flag) - end - - def visit_when(node) - visit(node.statements) - end - - def visit_while(node) - jumps = [] - - jumps << builder.jump(-1) - builder.putnil - builder.pop - jumps << builder.jump(-1) - - label = builder.label - visit(node.statements) - builder.pop - jumps.each { |jump| jump[1] = builder.label } - - visit(node.predicate) - builder.branchif(label) - builder.putnil if last_statement? - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - builder.concatstrings(length) - end - end - - def visit_words(node) - converted = nil - - if frozen_string_literal - begin - converted = node.accept(RubyVisitor.new) - rescue RubyVisitor::CompilationError - end - end - - if converted - builder.duparray(converted) - else - visit_all(node.elements) - builder.newarray(node.elements.length) - end - end - - def visit_xstring_literal(node) - builder.putself - length = visit_string_parts(node) - builder.concatstrings(node.parts.length) if length > 1 - builder.send(:`, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) - end - - def visit_yield(node) - parts = argument_parts(node.arguments) - visit_all(parts) - builder.invokeblock(nil, parts.length, VM_CALL_ARGS_SIMPLE) - end - - def visit_zsuper(_node) - builder.putself - builder.invokesuper( - nil, - 0, - VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER | VM_CALL_ZSUPER, - nil - ) - end - - private - - # This is a helper that is used in places where arguments may be present - # or they may be wrapped in parentheses. It's meant to descend down the - # tree and return an array of argument nodes. - def argument_parts(node) - case node - when nil - [] - when Args - node.parts - when ArgParen - if node.arguments.is_a?(ArgsForward) - [node.arguments] - else - node.arguments.parts - end - when Paren - node.contents.parts - end - end - - # Constant names when they are being assigned or referenced come in as a - # tree, but it's more convenient to work with them as an array. This - # method converts them into that array. This is nice because it's the - # operand that goes to opt_getconstant_path in Ruby 3.2. - def constant_names(node) - current = node - names = [] - - while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) - names.unshift(current.constant.value.to_sym) - current = current.parent - end - - case current - when VarField, VarRef - names.unshift(current.value.value.to_sym) - when TopConstRef - names.unshift(current.constant.value.to_sym) - names.unshift(:"") - end - - names - end - - # For the most part when an OpAssign (operator assignment) node with a ||= - # operator is being compiled it's a matter of reading the target, checking - # if the value should be evaluated, evaluating it if so, and then writing - # the result back to the target. - # - # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we - # first check if the value is defined using the defined instruction. I - # don't know why it is necessary, and suspect that it isn't. - def opassign_defined(node) - case node.target - when ConstPathField - visit(node.target.parent) - name = node.target.constant.value.to_sym - - builder.dup - builder.defined(DEFINED_CONST_FROM, name, true) - when TopConstField - name = node.target.constant.value.to_sym - - builder.putobject(Object) - builder.dup - builder.defined(DEFINED_CONST_FROM, name, true) - when VarField - name = node.target.value.value.to_sym - builder.putnil - - case node.target.value - when Const - builder.defined(DEFINED_CONST, name, true) - when CVar - builder.defined(DEFINED_CVAR, name, true) - when GVar - builder.defined(DEFINED_GVAR, name, true) - end - end - - branchunless = builder.branchunless(-1) - - case node.target - when ConstPathField, TopConstField - builder.dup - builder.putobject(true) - builder.getconstant(name) - when VarField - case node.target.value - when Const - builder.opt_getconstant_path(constant_names(node.target)) - when CVar - builder.getclassvariable(name) - when GVar - builder.getglobal(name) - end - end - - builder.dup - branchif = builder.branchif(-1) - builder.pop - - branchunless[1] = builder.label - visit(node.value) - - case node.target - when ConstPathField, TopConstField - builder.dupn(2) - builder.swap - builder.setconstant(name) - when VarField - builder.dup - - case node.target.value - when Const - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(name) - when CVar - builder.setclassvariable(name) - when GVar - builder.setglobal(name) - end - end - - branchif[1] = builder.label - end - - # Whenever a value is interpolated into a string-like structure, these - # three instructions are pushed. - def push_interpolate - builder.dup - builder.objtostring(:to_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) - builder.anytostring - end - - # There are a lot of nodes in the AST that act as contains of parts of - # strings. This includes things like string literals, regular expressions, - # heredocs, etc. This method will visit all the parts of a string within - # those containers. - def visit_string_parts(node) - length = 0 - - unless node.parts.first.is_a?(TStringContent) - builder.putobject("") - length += 1 - end - - node.parts.each do |part| - case part - when StringDVar - visit(part.variable) - push_interpolate - when StringEmbExpr - visit(part) - push_interpolate - when TStringContent - builder.putobject(part.accept(RubyVisitor.new)) - end - - length += 1 - end - - length - end - - # The current instruction sequence that we're compiling is always stored - # on the compiler. When we descend into a node that has its own - # instruction sequence, this method can be called to temporarily set the - # new value of the instruction sequence, yield, and then set it back. - def with_instruction_sequence(type, name, parent_iseq, node) - previous_iseq = current_iseq - previous_builder = builder - - begin - iseq = InstructionSequence.new(type, name, parent_iseq, node.location) - - @current_iseq = iseq - @builder = - Builder.new( - iseq, - frozen_string_literal: frozen_string_literal, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) - - yield - iseq - ensure - @current_iseq = previous_iseq - @builder = previous_builder - end - end - - # When we're compiling the last statement of a set of statements within a - # scope, the instructions sometimes change from pops to leaves. These - # kinds of peephole optimizations can reduce the overall number of - # instructions. Therefore, we keep track of whether we're compiling the - # last statement of a scope and allow visit methods to query that - # information. - def with_last_statement - previous = @last_statement - @last_statement = true - - begin - yield - ensure - @last_statement = previous - end - end - - def last_statement? - @last_statement - end - - # OpAssign nodes can have a number of different kinds of nodes as their - # "target" (i.e., the left-hand side of the assignment). When compiling - # these nodes we typically need to first fetch the current value of the - # variable, then perform some kind of action, then store the result back - # into the variable. This method handles that by first fetching the value, - # then yielding to the block, then storing the result. - def with_opassign(node) - case node.target - when ARefField - builder.putnil - visit(node.target.collection) - visit(node.target.index) - - builder.dupn(2) - builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) - - yield - - builder.setn(3) - builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) - builder.pop - when ConstPathField - name = node.target.constant.value.to_sym - - visit(node.target.parent) - builder.dup - builder.putobject(true) - builder.getconstant(name) - - yield - - if node.operator.value == "&&=" - builder.dupn(2) - else - builder.swap - builder.topn(1) - end - - builder.swap - builder.setconstant(name) - when TopConstField - name = node.target.constant.value.to_sym - - builder.putobject(Object) - builder.dup - builder.putobject(true) - builder.getconstant(name) - - yield - - if node.operator.value == "&&=" - builder.dupn(2) - else - builder.swap - builder.topn(1) - end - - builder.swap - builder.setconstant(name) - when VarField - case node.target.value - when Const - names = constant_names(node.target) - builder.opt_getconstant_path(names) - - yield - - builder.dup - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(names.last) - when CVar - name = node.target.value.value.to_sym - builder.getclassvariable(name) - - yield - - builder.dup - builder.setclassvariable(name) - when GVar - name = node.target.value.value.to_sym - builder.getglobal(name) - - yield - - builder.dup - builder.setglobal(name) - when Ident - local_variable = visit(node.target) - builder.getlocal(local_variable.index, local_variable.level) - - yield - - builder.dup - builder.setlocal(local_variable.index, local_variable.level) - when IVar - name = node.target.value.value.to_sym - builder.getinstancevariable(name) - - yield - - builder.dup - builder.setinstancevariable(name) - end - end - end - end - end -end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index cf0667bb..cdf2860e 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -2,17 +2,9 @@ return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" require_relative "test_helper" -require "fiddle" module SyntaxTree class CompilerTest < Minitest::Test - ISEQ_LOAD = - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - CASES = [ # Various literals placed on the stack "true", @@ -457,7 +449,7 @@ def serialize_iseq(iseq) when Array insn.map do |operand| if operand.is_a?(Array) && - operand[0] == Visitor::Compiler::InstructionSequence::MAGIC + operand[0] == Compiler::InstructionSequence::MAGIC serialize_iseq(operand) else operand @@ -478,20 +470,13 @@ def assert_compiles(source, **options) assert_equal( serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), - serialize_iseq(program.accept(Visitor::Compiler.new(**options))) + serialize_iseq(program.accept(Compiler.new(**options))) ) end def assert_evaluates(expected, source, **options) program = SyntaxTree.parse(source) - compiled = program.accept(Visitor::Compiler.new(**options)).to_a - - # Temporary hack until we get these working. - compiled[4][:node_id] = 11 - compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] - - iseq = Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)) - assert_equal expected, iseq.eval + assert_equal expected, program.accept(Compiler.new(**options)).eval end end end From 8b836c73b7cc2c9327a7782008a301653b2848dd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 20:20:45 -0500 Subject: [PATCH 02/21] Split YARV out into its own file --- lib/syntax_tree.rb | 1 + lib/syntax_tree/compiler.rb | 1062 ++++------------------------------- lib/syntax_tree/yarv.rb | 838 +++++++++++++++++++++++++++ test/compiler_test.rb | 2 +- 4 files changed, 954 insertions(+), 949 deletions(-) create mode 100644 lib/syntax_tree/yarv.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index c62132e6..187ff74d 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -26,6 +26,7 @@ require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" +require_relative "syntax_tree/yarv" require_relative "syntax_tree/compiler" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index d9b7e787..c936c9c1 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -185,839 +185,6 @@ def visit_unsupported(_node) end end - # This object is used to track the size of the stack at any given time. It - # is effectively a mini symbolic interpreter. It's necessary because when - # instruction sequences get serialized they include a :stack_max field on - # them. This field is used to determine how much stack space to allocate - # for the instruction sequence. - class Stack - attr_reader :current_size, :maximum_size - - def initialize - @current_size = 0 - @maximum_size = 0 - end - - def change_by(value) - @current_size += value - @maximum_size = @current_size if @current_size > @maximum_size - end - end - - # This represents every local variable associated with an instruction - # sequence. There are two kinds of locals: plain locals that are what you - # expect, and block proxy locals, which represent local variables - # associated with blocks that were passed into the current instruction - # sequence. - class LocalTable - # A local representing a block passed into the current instruction - # sequence. - class BlockLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # A regular local variable. - class PlainLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # The result of looking up a local variable in the current local table. - class Lookup - attr_reader :local, :index, :level - - def initialize(local, index, level) - @local = local - @index = index - @level = level - end - end - - attr_reader :locals - - def initialize - @locals = [] - end - - def find(name, level) - index = locals.index { |local| local.name == name } - Lookup.new(locals[index], index, level) if index - end - - def has?(name) - locals.any? { |local| local.name == name } - end - - def names - locals.map(&:name) - end - - def size - locals.length - end - - # Add a BlockLocal to the local table. - def block(name) - locals << BlockLocal.new(name) unless has?(name) - end - - # Add a PlainLocal to the local table. - def plain(name) - locals << PlainLocal.new(name) unless has?(name) - end - - # This is the offset from the top of the stack where this local variable - # lives. - def offset(index) - size - (index - 3) - 1 - end - end - - # This class is meant to mirror RubyVM::InstructionSequence. It contains a - # list of instructions along with the metadata pertaining to them. It also - # functions as a builder for the instruction sequence. - class InstructionSequence - MAGIC = "YARVInstructionSequence/SimpleDataFormat" - - # This provides a handle to the rb_iseq_load function, which allows you to - # pass a serialized iseq to Ruby and have it return a - # RubyVM::InstructionSequence object. - ISEQ_LOAD = - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - - # The type of the instruction sequence. - attr_reader :type - - # The name of the instruction sequence. - attr_reader :name - - # The parent instruction sequence, if there is one. - attr_reader :parent_iseq - - # The location of the root node of this instruction sequence. - attr_reader :location - - # This is the list of information about the arguments to this - # instruction sequence. - attr_accessor :argument_size - attr_reader :argument_options - - # The list of instructions for this instruction sequence. - attr_reader :insns - - # The table of local variables. - attr_reader :local_table - - # The hash of names of instance and class variables pointing to the - # index of their associated inline storage. - attr_reader :inline_storages - - # The index of the next inline storage that will be created. - attr_reader :storage_index - - # An object that will track the current size of the stack and the - # maximum size of the stack for this instruction sequence. - attr_reader :stack - - def initialize(type, name, parent_iseq, location) - @type = type - @name = name - @parent_iseq = parent_iseq - @location = location - - @argument_size = 0 - @argument_options = {} - - @local_table = LocalTable.new - @inline_storages = {} - @insns = [] - @storage_index = 0 - @stack = Stack.new - end - - def local_variable(name, level = 0) - if (lookup = local_table.find(name, level)) - lookup - elsif parent_iseq - parent_iseq.local_variable(name, level + 1) - end - end - - def push(insn) - insns << insn - insn - end - - def inline_storage - storage = storage_index - @storage_index += 1 - storage - end - - def inline_storage_for(name) - unless inline_storages.key?(name) - inline_storages[name] = inline_storage - end - - inline_storages[name] - end - - def length - insns.inject(0) do |sum, insn| - insn.is_a?(Array) ? sum + insn.length : sum - end - end - - def each_child - insns.each do |insn| - insn[1..].each do |operand| - yield operand if operand.is_a?(InstructionSequence) - end - end - end - - def eval - compiled = to_a - - # Temporary hack until we get these working. - compiled[4][:node_id] = 11 - compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] - - Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval - end - - def to_a - versions = RUBY_VERSION.split(".").map(&:to_i) - - [ - MAGIC, - versions[0], - versions[1], - 1, - { - arg_size: argument_size, - local_size: local_table.size, - stack_max: stack.maximum_size - }, - name, - "", - "", - location.start_line, - type, - local_table.names, - argument_options, - [], - insns.map { |insn| serialize(insn) } - ] - end - - private - - def serialize(insn) - case insn[0] - when :checkkeyword, :getblockparam, :getblockparamproxy, - :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, - :setlocal_WC_1, :setlocal - iseq = self - - case insn[0] - when :getlocal_WC_1, :setlocal_WC_1 - iseq = iseq.parent_iseq - when :getblockparam, :getblockparamproxy, :getlocal, :setlocal - insn[2].times { iseq = iseq.parent_iseq } - end - - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] - when :defineclass - [insn[0], insn[1], insn[2].to_a, insn[3]] - when :definemethod, :definesmethod - [insn[0], insn[1], insn[2].to_a] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - when :once - [insn[0], insn[1].to_a, insn[2]] - else - insn - end - end - end - - # This class serves as a layer of indirection between the instruction - # sequence and the compiler. It allows us to provide different behavior - # for certain instructions depending on the Ruby version. For example, - # class variable reads and writes gained an inline cache in Ruby 3.0. So - # we place the logic for checking the Ruby version in this class. - class Builder - attr_reader :iseq, :stack - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - def initialize( - iseq, - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @iseq = iseq - @stack = iseq.stack - - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - end - - # This creates a new label at the current length of the instruction - # sequence. It is used as the operand for jump instructions. - def label - name = :"label_#{iseq.length}" - iseq.insns.last == name ? name : event(name) - end - - def event(name) - iseq.push(name) - name - end - - def adjuststack(number) - stack.change_by(-number) - iseq.push([:adjuststack, number]) - end - - def anytostring - stack.change_by(-2 + 1) - iseq.push([:anytostring]) - end - - def branchif(index) - stack.change_by(-1) - iseq.push([:branchif, index]) - end - - def branchnil(index) - stack.change_by(-1) - iseq.push([:branchnil, index]) - end - - def branchunless(index) - stack.change_by(-1) - iseq.push([:branchunless, index]) - end - - def checkkeyword(index, keyword_index) - stack.change_by(+1) - iseq.push([:checkkeyword, index, keyword_index]) - end - - def concatarray - stack.change_by(-2 + 1) - iseq.push([:concatarray]) - end - - def concatstrings(number) - stack.change_by(-number + 1) - iseq.push([:concatstrings, number]) - end - - def defined(type, name, message) - stack.change_by(-1 + 1) - iseq.push([:defined, type, name, message]) - end - - def defineclass(name, class_iseq, flags) - stack.change_by(-2 + 1) - iseq.push([:defineclass, name, class_iseq, flags]) - end - - def definemethod(name, method_iseq) - stack.change_by(0) - iseq.push([:definemethod, name, method_iseq]) - end - - def definesmethod(name, method_iseq) - stack.change_by(-1) - iseq.push([:definesmethod, name, method_iseq]) - end - - def dup - stack.change_by(-1 + 2) - iseq.push([:dup]) - end - - def duparray(object) - stack.change_by(+1) - iseq.push([:duparray, object]) - end - - def duphash(object) - stack.change_by(+1) - iseq.push([:duphash, object]) - end - - def dupn(number) - stack.change_by(+number) - iseq.push([:dupn, number]) - end - - def expandarray(length, flag) - stack.change_by(-1 + length) - iseq.push([:expandarray, length, flag]) - end - - def getblockparam(index, level) - stack.change_by(+1) - iseq.push([:getblockparam, index, level]) - end - - def getblockparamproxy(index, level) - stack.change_by(+1) - iseq.push([:getblockparamproxy, index, level]) - end - - def getclassvariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.0" - iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) - else - iseq.push([:getclassvariable, name]) - end - end - - def getconstant(name) - stack.change_by(-2 + 1) - iseq.push([:getconstant, name]) - end - - def getglobal(name) - stack.change_by(+1) - iseq.push([:getglobal, name]) - end - - def getinstancevariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.2" - iseq.push([:getinstancevariable, name, iseq.inline_storage]) - else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:getinstancevariable, name, inline_storage]) - end - end - - def getlocal(index, level) - stack.change_by(+1) - - if operands_unification - # Specialize the getlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will look at the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:getlocal_WC_0, index]) - when 1 - iseq.push([:getlocal_WC_1, index]) - else - iseq.push([:getlocal, index, level]) - end - else - iseq.push([:getlocal, index, level]) - end - end - - def getspecial(key, type) - stack.change_by(-0 + 1) - iseq.push([:getspecial, key, type]) - end - - def intern - stack.change_by(-1 + 1) - iseq.push([:intern]) - end - - def invokeblock(method_id, argc, flag) - stack.change_by(-argc + 1) - iseq.push([:invokeblock, call_data(method_id, argc, flag)]) - end - - def invokesuper(method_id, argc, flag, block_iseq) - stack.change_by(-(argc + 1) + 1) - - cdata = call_data(method_id, argc, flag) - iseq.push([:invokesuper, cdata, block_iseq]) - end - - def jump(index) - stack.change_by(0) - iseq.push([:jump, index]) - end - - def leave - stack.change_by(-1) - iseq.push([:leave]) - end - - def newarray(length) - stack.change_by(-length + 1) - iseq.push([:newarray, length]) - end - - def newhash(length) - stack.change_by(-length + 1) - iseq.push([:newhash, length]) - end - - def newrange(flag) - stack.change_by(-2 + 1) - iseq.push([:newrange, flag]) - end - - def nop - stack.change_by(0) - iseq.push([:nop]) - end - - def objtostring(method_id, argc, flag) - stack.change_by(-1 + 1) - iseq.push([:objtostring, call_data(method_id, argc, flag)]) - end - - def once(postexe_iseq, inline_storage) - stack.change_by(+1) - iseq.push([:once, postexe_iseq, inline_storage]) - end - - def opt_getconstant_path(names) - if RUBY_VERSION >= "3.2" - stack.change_by(+1) - iseq.push([:opt_getconstant_path, names]) - else - inline_storage = iseq.inline_storage - getinlinecache = opt_getinlinecache(-1, inline_storage) - - if names[0] == :"" - names.shift - pop - putobject(Object) - end - - names.each_with_index do |name, index| - putobject(index == 0) - getconstant(name) - end - - opt_setinlinecache(inline_storage) - getinlinecache[1] = label - end - end - - def opt_getinlinecache(offset, inline_storage) - stack.change_by(+1) - iseq.push([:opt_getinlinecache, offset, inline_storage]) - end - - def opt_newarray_max(length) - if specialized_instruction - stack.change_by(-length + 1) - iseq.push([:opt_newarray_max, length]) - else - newarray(length) - send(:max, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_newarray_min(length) - if specialized_instruction - stack.change_by(-length + 1) - iseq.push([:opt_newarray_min, length]) - else - newarray(length) - send(:min, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_setinlinecache(inline_storage) - stack.change_by(-1 + 1) - iseq.push([:opt_setinlinecache, inline_storage]) - end - - def opt_str_freeze(value) - if specialized_instruction - stack.change_by(+1) - iseq.push( - [ - :opt_str_freeze, - value, - call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) - ] - ) - else - putstring(value) - send(:freeze, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_str_uminus(value) - if specialized_instruction - stack.change_by(+1) - iseq.push( - [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] - ) - else - putstring(value) - send(:-@, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def pop - stack.change_by(-1) - iseq.push([:pop]) - end - - def putnil - stack.change_by(+1) - iseq.push([:putnil]) - end - - def putobject(object) - stack.change_by(+1) - - if operands_unification - # Specialize the putobject instruction based on the value of the - # object. If it's 0 or 1, then there's a specialized instruction - # that will push the object onto the stack and requires fewer - # operands. - if object.eql?(0) - iseq.push([:putobject_INT2FIX_0_]) - elsif object.eql?(1) - iseq.push([:putobject_INT2FIX_1_]) - else - iseq.push([:putobject, object]) - end - else - iseq.push([:putobject, object]) - end - end - - def putself - stack.change_by(+1) - iseq.push([:putself]) - end - - def putspecialobject(object) - stack.change_by(+1) - iseq.push([:putspecialobject, object]) - end - - def putstring(object) - stack.change_by(+1) - iseq.push([:putstring, object]) - end - - def send(method_id, argc, flag, block_iseq = nil) - stack.change_by(-(argc + 1) + 1) - cdata = call_data(method_id, argc, flag) - - if specialized_instruction - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and the - # number of arguments. - - # stree-ignore - if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 - case [method_id, argc] - when [:length, 0] then iseq.push([:opt_length, cdata]) - when [:size, 0] then iseq.push([:opt_size, cdata]) - when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) - when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) - when [:succ, 0] then iseq.push([:opt_succ, cdata]) - when [:!, 0] then iseq.push([:opt_not, cdata]) - when [:+, 1] then iseq.push([:opt_plus, cdata]) - when [:-, 1] then iseq.push([:opt_minus, cdata]) - when [:*, 1] then iseq.push([:opt_mult, cdata]) - when [:/, 1] then iseq.push([:opt_div, cdata]) - when [:%, 1] then iseq.push([:opt_mod, cdata]) - when [:==, 1] then iseq.push([:opt_eq, cdata]) - when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) - when [:<, 1] then iseq.push([:opt_lt, cdata]) - when [:<=, 1] then iseq.push([:opt_le, cdata]) - when [:>, 1] then iseq.push([:opt_gt, cdata]) - when [:>=, 1] then iseq.push([:opt_ge, cdata]) - when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) - when [:[], 1] then iseq.push([:opt_aref, cdata]) - when [:&, 1] then iseq.push([:opt_and, cdata]) - when [:|, 1] then iseq.push([:opt_or, cdata]) - when [:[]=, 2] then iseq.push([:opt_aset, cdata]) - when [:!=, 1] - eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) - iseq.push([:opt_neq, eql_data, cdata]) - else - iseq.push([:opt_send_without_block, cdata]) - end - else - iseq.push([:send, cdata, block_iseq]) - end - else - iseq.push([:send, cdata, block_iseq]) - end - end - - def setclassvariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.0" - iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) - else - iseq.push([:setclassvariable, name]) - end - end - - def setconstant(name) - stack.change_by(-2) - iseq.push([:setconstant, name]) - end - - def setglobal(name) - stack.change_by(-1) - iseq.push([:setglobal, name]) - end - - def setinstancevariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.2" - iseq.push([:setinstancevariable, name, iseq.inline_storage]) - else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:setinstancevariable, name, inline_storage]) - end - end - - def setlocal(index, level) - stack.change_by(-1) - - if operands_unification - # Specialize the setlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will write to the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:setlocal_WC_0, index]) - when 1 - iseq.push([:setlocal_WC_1, index]) - else - iseq.push([:setlocal, index, level]) - end - else - iseq.push([:setlocal, index, level]) - end - end - - def setn(number) - stack.change_by(-1 + 1) - iseq.push([:setn, number]) - end - - def splatarray(flag) - stack.change_by(-1 + 1) - iseq.push([:splatarray, flag]) - end - - def swap - stack.change_by(-2 + 2) - iseq.push([:swap]) - end - - def topn(number) - stack.change_by(+1) - iseq.push([:topn, number]) - end - - def toregexp(options, length) - stack.change_by(-length + 1) - iseq.push([:toregexp, options, length]) - end - - private - - # This creates a call data object that is used as the operand for the - # send, invokesuper, and objtostring instructions. - def call_data(method_id, argc, flag) - { mid: method_id, flag: flag, orig_argc: argc } - end - end - - # These constants correspond to the putspecialobject instruction. They are - # used to represent special objects that are pushed onto the stack. - VM_SPECIAL_OBJECT_VMCORE = 1 - VM_SPECIAL_OBJECT_CBASE = 2 - VM_SPECIAL_OBJECT_CONST_BASE = 3 - - # These constants correspond to the flag passed as part of the call data - # structure on the send instruction. They are used to represent various - # metadata about the callsite (e.g., were keyword arguments used?, was a - # block given?, etc.). - VM_CALL_ARGS_SPLAT = 1 << 0 - VM_CALL_ARGS_BLOCKARG = 1 << 1 - VM_CALL_FCALL = 1 << 2 - VM_CALL_VCALL = 1 << 3 - VM_CALL_ARGS_SIMPLE = 1 << 4 - VM_CALL_BLOCKISEQ = 1 << 5 - VM_CALL_KWARG = 1 << 6 - VM_CALL_KW_SPLAT = 1 << 7 - VM_CALL_TAILCALL = 1 << 8 - VM_CALL_SUPER = 1 << 9 - VM_CALL_ZSUPER = 1 << 10 - VM_CALL_OPT_SEND = 1 << 11 - VM_CALL_KW_SPLAT_MUT = 1 << 12 - - # These constants correspond to the value passed as part of the defined - # instruction. It's an enum defined in the CRuby codebase that tells that - # instruction what kind of defined check to perform. - DEFINED_NIL = 1 - DEFINED_IVAR = 2 - DEFINED_LVAR = 3 - DEFINED_GVAR = 4 - DEFINED_CVAR = 5 - DEFINED_CONST = 6 - DEFINED_METHOD = 7 - DEFINED_YIELD = 8 - DEFINED_ZSUPER = 9 - DEFINED_SELF = 10 - DEFINED_TRUE = 11 - DEFINED_FALSE = 12 - DEFINED_ASGN = 13 - DEFINED_EXPR = 14 - DEFINED_REF = 15 - DEFINED_FUNC = 16 - DEFINED_CONST_FROM = 17 - - # These constants correspond to the value passed in the flags as part of - # the defineclass instruction. - VM_DEFINECLASS_TYPE_CLASS = 0 - VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 - VM_DEFINECLASS_TYPE_MODULE = 2 - VM_DEFINECLASS_FLAG_SCOPED = 8 - VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 - # These options mirror the compilation options that we currently support # that can be also passed to RubyVM::InstructionSequence.compile. attr_reader :frozen_string_literal, @@ -1074,8 +241,8 @@ def visit_END(node) builder.leave end - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.send(:"core#set_postexe", 0, VM_CALL_FCALL, postexe_iseq) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + builder.send(:"core#set_postexe", 0, YARV::VM_CALL_FCALL, postexe_iseq) builder.leave end @@ -1084,17 +251,17 @@ def visit_END(node) end def visit_alias(node) - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(node.left) visit(node.right) - builder.send(:"core#set_method_alias", 3, VM_CALL_ARGS_SIMPLE) + builder.send(:"core#set_method_alias", 3, YARV::VM_CALL_ARGS_SIMPLE) end def visit_aref(node) visit(node.collection) visit(node.index) - builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) + builder.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) end def visit_arg_block(node) @@ -1150,7 +317,7 @@ def visit_assign(node) visit(node.target.index) visit(node.value) builder.setn(3) - builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + builder.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) builder.pop when ConstPathField names = constant_names(node.target) @@ -1174,7 +341,7 @@ def visit_assign(node) visit(node.target) visit(node.value) builder.setn(2) - builder.send(:"#{node.target.name.value}=", 1, VM_CALL_ARGS_SIMPLE) + builder.send(:"#{node.target.name.value}=", 1, YARV::VM_CALL_ARGS_SIMPLE) builder.pop when TopConstField name = node.target.constant.value.to_sym @@ -1198,7 +365,7 @@ def visit_assign(node) case node.target.value when Const - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) builder.setconstant(node.target.value.value.to_sym) when CVar builder.setclassvariable(node.target.value.value.to_sym) @@ -1257,7 +424,7 @@ def visit_binary(node) else visit(node.left) visit(node.right) - builder.send(node.operator, 1, VM_CALL_ARGS_SIMPLE) + builder.send(node.operator, 1, YARV::VM_CALL_ARGS_SIMPLE) end end @@ -1357,12 +524,14 @@ def visit_call(node) end if node.receiver - if node.receiver.is_a?(VarRef) && - ( - lookup = - current_iseq.local_variable(node.receiver.value.value.to_sym) - ) && lookup.local.is_a?(LocalTable::BlockLocal) - builder.getblockparamproxy(lookup.index, lookup.level) + if node.receiver.is_a?(VarRef) + lookup = current_iseq.local_variable(node.receiver.value.value.to_sym) + + if lookup.local.is_a?(YARV::LocalTable::BlockLocal) + builder.getblockparamproxy(lookup.index, lookup.level) + else + visit(node.receiver) + end else visit(node.receiver) end @@ -1382,13 +551,13 @@ def visit_call(node) case arg_part when ArgBlock argc -= 1 - flag |= VM_CALL_ARGS_BLOCKARG + flag |= YARV::VM_CALL_ARGS_BLOCKARG visit(arg_part) when ArgStar - flag |= VM_CALL_ARGS_SPLAT + flag |= YARV::VM_CALL_ARGS_SPLAT visit(arg_part) when ArgsForward - flag |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_BLOCKARG + flag |= YARV::VM_CALL_ARGS_SPLAT | YARV::VM_CALL_ARGS_BLOCKARG lookup = current_iseq.local_table.find(:*, 0) builder.getlocal(lookup.index, lookup.level) @@ -1397,7 +566,7 @@ def visit_call(node) lookup = current_iseq.local_table.find(:&, 0) builder.getblockparamproxy(lookup.index, lookup.level) when BareAssocHash - flag |= VM_CALL_KW_SPLAT + flag |= YARV::VM_CALL_KW_SPLAT visit(arg_part) else visit(arg_part) @@ -1405,8 +574,8 @@ def visit_call(node) end block_iseq = visit(node.block) if node.block - flag |= VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 - flag |= VM_CALL_FCALL if node.receiver.nil? + flag |= YARV::VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 + flag |= YARV::VM_CALL_FCALL if node.receiver.nil? builder.send(node.message.value.to_sym, argc, flag, block_iseq) branchnil[1] = builder.label if branchnil @@ -1433,7 +602,7 @@ def visit_case(node) clauses.map do |clause| visit(clause.arguments) builder.topn(1) - builder.send(:===, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + builder.send(:===, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) [clause, builder.branchif(:label_00)] end @@ -1466,21 +635,21 @@ def visit_class(node) builder.leave end - flags = VM_DEFINECLASS_TYPE_CLASS + flags = YARV::VM_DEFINECLASS_TYPE_CLASS case node.constant when ConstPathRef - flags |= VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED visit(node.constant.parent) when ConstRef - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef - flags |= VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED builder.putobject(Object) end if node.superclass - flags |= VM_DEFINECLASS_FLAG_HAS_SUPERCLASS + flags |= YARV::VM_DEFINECLASS_FLAG_HAS_SUPERCLASS visit(node.superclass) else builder.putnil @@ -1569,18 +738,18 @@ def visit_defined(node) case value when Const builder.putnil - builder.defined(DEFINED_CONST, name, "constant") + builder.defined(YARV::DEFINED_CONST, name, "constant") when CVar builder.putnil - builder.defined(DEFINED_CVAR, name, "class variable") + builder.defined(YARV::DEFINED_CVAR, name, "class variable") when GVar builder.putnil - builder.defined(DEFINED_GVAR, name, "global-variable") + builder.defined(YARV::DEFINED_GVAR, name, "global-variable") when Ident builder.putobject("local-variable") when IVar builder.putnil - builder.defined(DEFINED_IVAR, name, "instance-variable") + builder.defined(YARV::DEFINED_IVAR, name, "instance-variable") when Kw case name when :false @@ -1597,13 +766,13 @@ def visit_defined(node) builder.putself name = node.value.value.value.to_sym - builder.defined(DEFINED_FUNC, name, "method") + builder.defined(YARV::DEFINED_FUNC, name, "method") when YieldNode builder.putnil - builder.defined(DEFINED_YIELD, false, "yield") + builder.defined(YARV::DEFINED_YIELD, false, "yield") when ZSuper builder.putnil - builder.defined(DEFINED_ZSUPER, false, "super") + builder.defined(YARV::DEFINED_ZSUPER, false, "super") else builder.putobject("expression") end @@ -1676,10 +845,12 @@ def visit_for(node) end def visit_hash(node) - builder.duphash(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - visit_all(node.assocs) - builder.newhash(node.assocs.length * 2) + if (compiled = RubyVisitor.compile(node)) + builder.duphash(compiled) + else + visit_all(node.assocs) + builder.newhash(node.assocs.length * 2) + end end def visit_heredoc(node) @@ -1766,8 +937,8 @@ def visit_lambda(node) builder.leave end - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.send(:lambda, 0, VM_CALL_FCALL, lambda_iseq) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + builder.send(:lambda, 0, YARV::VM_CALL_FCALL, lambda_iseq) end def visit_lambda_var(node) @@ -1823,16 +994,16 @@ def visit_module(node) builder.leave end - flags = VM_DEFINECLASS_TYPE_MODULE + flags = YARV::VM_DEFINECLASS_TYPE_MODULE case node.constant when ConstPathRef - flags |= VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED visit(node.constant.parent) when ConstRef - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef - flags |= VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED builder.putobject(Object) end @@ -1851,13 +1022,13 @@ def visit_mrhs(node) def visit_not(node) visit(node.statement) - builder.send(:!, 0, VM_CALL_ARGS_SIMPLE) + builder.send(:!, 0, YARV::VM_CALL_ARGS_SIMPLE) end def visit_opassign(node) - flag = VM_CALL_ARGS_SIMPLE + flag = YARV::VM_CALL_ARGS_SIMPLE if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) - flag |= VM_CALL_FCALL + flag |= YARV::VM_CALL_FCALL end case (operator = node.operator.value.chomp("=").to_sym) @@ -1977,18 +1148,16 @@ def visit_params(node) if value.nil? argument_options[:keyword] << name + elsif (compiled = RubyVisitor.compile(value)) + compiled = value.accept(RubyVisitor.new) + argument_options[:keyword] << [name, compiled] else - begin - compiled = value.accept(RubyVisitor.new) - argument_options[:keyword] << [name, compiled] - rescue RubyVisitor::CompilationError - argument_options[:keyword] << [name] - checkkeywords << builder.checkkeyword(-1, keyword_index) - branchif = builder.branchif(-1) - visit(value) - builder.setlocal(index, 0) - branchif[1] = builder.label - end + argument_options[:keyword] << [name] + checkkeywords << builder.checkkeyword(-1, keyword_index) + branchif = builder.branchif(-1) + visit(value) + builder.setlocal(index, 0) + branchif[1] = builder.label end end @@ -2075,11 +1244,13 @@ def visit_qwords(node) end def visit_range(node) - builder.putobject(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - visit(node.left) - visit(node.right) - builder.newrange(node.operator.value == ".." ? 0 : 1) + if (compiled = RubyVisitor.compile(node)) + builder.putobject(compiled) + else + visit(node.left) + visit(node.right) + builder.newrange(node.operator.value == ".." ? 0 : 1) + end end def visit_rational(node) @@ -2087,11 +1258,13 @@ def visit_rational(node) end def visit_regexp_literal(node) - builder.putobject(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - flags = RubyVisitor.new.visit_regexp_literal_flags(node) - length = visit_string_parts(node) - builder.toregexp(flags, length) + if (compiled = RubyVisitor.compile(node)) + builder.putobject(compiled) + else + flags = RubyVisitor.new.visit_regexp_literal_flags(node) + length = visit_string_parts(node) + builder.toregexp(flags, length) + end end def visit_rest_param(node) @@ -2120,7 +1293,7 @@ def visit_sclass(node) builder.defineclass( :singletonclass, singleton_iseq, - VM_DEFINECLASS_TYPE_SINGLETON_CLASS + YARV::VM_DEFINECLASS_TYPE_SINGLETON_CLASS ) end @@ -2170,7 +1343,7 @@ def visit_super(node) builder.invokesuper( nil, argument_parts(node.arguments).length, - VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER, + YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER, nil ) end @@ -2180,20 +1353,22 @@ def visit_symbol_literal(node) end def visit_symbols(node) - builder.duparray(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - node.elements.each do |element| - if element.parts.length == 1 && - element.parts.first.is_a?(TStringContent) - builder.putobject(element.parts.first.value.to_sym) - else - length = visit_string_parts(element) - builder.concatstrings(length) - builder.intern + if (compiled = RubyVisitor.compile(node)) + builder.duparray(compiled) + else + node.elements.each do |element| + if element.parts.length == 1 && + element.parts.first.is_a?(TStringContent) + builder.putobject(element.parts.first.value.to_sym) + else + length = visit_string_parts(element) + builder.concatstrings(length) + builder.intern + end end - end - builder.newarray(node.elements.length) + builder.newarray(node.elements.length) + end end def visit_top_const_ref(node) @@ -2232,10 +1407,10 @@ def visit_unary(node) def visit_undef(node) node.symbols.each_with_index do |symbol, index| builder.pop if index != 0 - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(symbol) - builder.send(:"core#undef_method", 2, VM_CALL_ARGS_SIMPLE) + builder.send(:"core#undef_method", 2, YARV::VM_CALL_ARGS_SIMPLE) end end @@ -2311,9 +1486,9 @@ def visit_var_ref(node) lookup = current_iseq.local_variable(node.value.value.to_sym) case lookup.local - when LocalTable::BlockLocal + when YARV::LocalTable::BlockLocal builder.getblockparam(lookup.index, lookup.level) - when LocalTable::PlainLocal + when YARV::LocalTable::PlainLocal builder.getlocal(lookup.index, lookup.level) end when IVar @@ -2336,7 +1511,7 @@ def visit_var_ref(node) def visit_vcall(node) builder.putself - flag = VM_CALL_FCALL | VM_CALL_VCALL | VM_CALL_ARGS_SIMPLE + flag = YARV::VM_CALL_FCALL | YARV::VM_CALL_VCALL | YARV::VM_CALL_ARGS_SIMPLE builder.send(node.value.value.to_sym, 0, flag) end @@ -2372,17 +1547,8 @@ def visit_word(node) end def visit_words(node) - converted = nil - - if frozen_string_literal - begin - converted = node.accept(RubyVisitor.new) - rescue RubyVisitor::CompilationError - end - end - - if converted - builder.duparray(converted) + if frozen_string_literal && (compiled = RubyVisitor.compile(node)) + builder.duparray(compiled) else visit_all(node.elements) builder.newarray(node.elements.length) @@ -2393,13 +1559,13 @@ def visit_xstring_literal(node) builder.putself length = visit_string_parts(node) builder.concatstrings(node.parts.length) if length > 1 - builder.send(:`, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + builder.send(:`, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) end def visit_yield(node) parts = argument_parts(node.arguments) visit_all(parts) - builder.invokeblock(nil, parts.length, VM_CALL_ARGS_SIMPLE) + builder.invokeblock(nil, parts.length, YARV::VM_CALL_ARGS_SIMPLE) end def visit_zsuper(_node) @@ -2407,7 +1573,7 @@ def visit_zsuper(_node) builder.invokesuper( nil, 0, - VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER | VM_CALL_ZSUPER, + YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER | YARV::VM_CALL_ZSUPER, nil ) end @@ -2473,24 +1639,24 @@ def opassign_defined(node) name = node.target.constant.value.to_sym builder.dup - builder.defined(DEFINED_CONST_FROM, name, true) + builder.defined(YARV::DEFINED_CONST_FROM, name, true) when TopConstField name = node.target.constant.value.to_sym builder.putobject(Object) builder.dup - builder.defined(DEFINED_CONST_FROM, name, true) + builder.defined(YARV::DEFINED_CONST_FROM, name, true) when VarField name = node.target.value.value.to_sym builder.putnil case node.target.value when Const - builder.defined(DEFINED_CONST, name, true) + builder.defined(YARV::DEFINED_CONST, name, true) when CVar - builder.defined(DEFINED_CVAR, name, true) + builder.defined(YARV::DEFINED_CVAR, name, true) when GVar - builder.defined(DEFINED_GVAR, name, true) + builder.defined(YARV::DEFINED_GVAR, name, true) end end @@ -2529,7 +1695,7 @@ def opassign_defined(node) case node.target.value when Const - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) builder.setconstant(name) when CVar builder.setclassvariable(name) @@ -2545,7 +1711,7 @@ def opassign_defined(node) # three instructions are pushed. def push_interpolate builder.dup - builder.objtostring(:to_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + builder.objtostring(:to_s, 0, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) builder.anytostring end @@ -2588,11 +1754,11 @@ def with_instruction_sequence(type, name, parent_iseq, node) previous_builder = builder begin - iseq = InstructionSequence.new(type, name, parent_iseq, node.location) + iseq = YARV::InstructionSequence.new(type, name, parent_iseq, node.location) @current_iseq = iseq @builder = - Builder.new( + YARV::Builder.new( iseq, frozen_string_literal: frozen_string_literal, operands_unification: operands_unification, @@ -2642,12 +1808,12 @@ def with_opassign(node) visit(node.target.index) builder.dupn(2) - builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) + builder.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) yield builder.setn(3) - builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + builder.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) builder.pop when ConstPathField name = node.target.constant.value.to_sym @@ -2696,7 +1862,7 @@ def with_opassign(node) yield builder.dup - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) builder.setconstant(names.last) when CVar name = node.target.value.value.to_sym diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb new file mode 100644 index 00000000..42faa66b --- /dev/null +++ b/lib/syntax_tree/yarv.rb @@ -0,0 +1,838 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This object is used to track the size of the stack at any given time. It + # is effectively a mini symbolic interpreter. It's necessary because when + # instruction sequences get serialized they include a :stack_max field on + # them. This field is used to determine how much stack space to allocate + # for the instruction sequence. + class Stack + attr_reader :current_size, :maximum_size + + def initialize + @current_size = 0 + @maximum_size = 0 + end + + def change_by(value) + @current_size += value + @maximum_size = @current_size if @current_size > @maximum_size + end + end + + # This represents every local variable associated with an instruction + # sequence. There are two kinds of locals: plain locals that are what you + # expect, and block proxy locals, which represent local variables + # associated with blocks that were passed into the current instruction + # sequence. + class LocalTable + # A local representing a block passed into the current instruction + # sequence. + class BlockLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # A regular local variable. + class PlainLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # The result of looking up a local variable in the current local table. + class Lookup + attr_reader :local, :index, :level + + def initialize(local, index, level) + @local = local + @index = index + @level = level + end + end + + attr_reader :locals + + def initialize + @locals = [] + end + + def find(name, level) + index = locals.index { |local| local.name == name } + Lookup.new(locals[index], index, level) if index + end + + def has?(name) + locals.any? { |local| local.name == name } + end + + def names + locals.map(&:name) + end + + def size + locals.length + end + + # Add a BlockLocal to the local table. + def block(name) + locals << BlockLocal.new(name) unless has?(name) + end + + # Add a PlainLocal to the local table. + def plain(name) + locals << PlainLocal.new(name) unless has?(name) + end + + # This is the offset from the top of the stack where this local variable + # lives. + def offset(index) + size - (index - 3) - 1 + end + end + + # This class is meant to mirror RubyVM::InstructionSequence. It contains a + # list of instructions along with the metadata pertaining to them. It also + # functions as a builder for the instruction sequence. + class InstructionSequence + MAGIC = "YARVInstructionSequence/SimpleDataFormat" + + # This provides a handle to the rb_iseq_load function, which allows you to + # pass a serialized iseq to Ruby and have it return a + # RubyVM::InstructionSequence object. + ISEQ_LOAD = + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + + # The type of the instruction sequence. + attr_reader :type + + # The name of the instruction sequence. + attr_reader :name + + # The parent instruction sequence, if there is one. + attr_reader :parent_iseq + + # The location of the root node of this instruction sequence. + attr_reader :location + + # This is the list of information about the arguments to this + # instruction sequence. + attr_accessor :argument_size + attr_reader :argument_options + + # The list of instructions for this instruction sequence. + attr_reader :insns + + # The table of local variables. + attr_reader :local_table + + # The hash of names of instance and class variables pointing to the + # index of their associated inline storage. + attr_reader :inline_storages + + # The index of the next inline storage that will be created. + attr_reader :storage_index + + # An object that will track the current size of the stack and the + # maximum size of the stack for this instruction sequence. + attr_reader :stack + + def initialize(type, name, parent_iseq, location) + @type = type + @name = name + @parent_iseq = parent_iseq + @location = location + + @argument_size = 0 + @argument_options = {} + + @local_table = LocalTable.new + @inline_storages = {} + @insns = [] + @storage_index = 0 + @stack = Stack.new + end + + def local_variable(name, level = 0) + if (lookup = local_table.find(name, level)) + lookup + elsif parent_iseq + parent_iseq.local_variable(name, level + 1) + end + end + + def push(insn) + insns << insn + insn + end + + def inline_storage + storage = storage_index + @storage_index += 1 + storage + end + + def inline_storage_for(name) + unless inline_storages.key?(name) + inline_storages[name] = inline_storage + end + + inline_storages[name] + end + + def length + insns.inject(0) do |sum, insn| + insn.is_a?(Array) ? sum + insn.length : sum + end + end + + def each_child + insns.each do |insn| + insn[1..].each do |operand| + yield operand if operand.is_a?(InstructionSequence) + end + end + end + + def eval + compiled = to_a + + # Temporary hack until we get these working. + compiled[4][:node_id] = 11 + compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] + + Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval + end + + def to_a + versions = RUBY_VERSION.split(".").map(&:to_i) + + [ + MAGIC, + versions[0], + versions[1], + 1, + { + arg_size: argument_size, + local_size: local_table.size, + stack_max: stack.maximum_size + }, + name, + "", + "", + location.start_line, + type, + local_table.names, + argument_options, + [], + insns.map { |insn| serialize(insn) } + ] + end + + private + + def serialize(insn) + case insn[0] + when :checkkeyword, :getblockparam, :getblockparamproxy, + :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, + :setlocal_WC_1, :setlocal + iseq = self + + case insn[0] + when :getlocal_WC_1, :setlocal_WC_1 + iseq = iseq.parent_iseq + when :getblockparam, :getblockparamproxy, :getlocal, :setlocal + insn[2].times { iseq = iseq.parent_iseq } + end + + # Here we need to map the local variable index to the offset + # from the top of the stack where it will be stored. + [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] + when :defineclass + [insn[0], insn[1], insn[2].to_a, insn[3]] + when :definemethod, :definesmethod + [insn[0], insn[1], insn[2].to_a] + when :send + # For any instructions that push instruction sequences onto the + # stack, we need to call #to_a on them as well. + [insn[0], insn[1], (insn[2].to_a if insn[2])] + when :once + [insn[0], insn[1].to_a, insn[2]] + else + insn + end + end + end + + # This class serves as a layer of indirection between the instruction + # sequence and the compiler. It allows us to provide different behavior + # for certain instructions depending on the Ruby version. For example, + # class variable reads and writes gained an inline cache in Ruby 3.0. So + # we place the logic for checking the Ruby version in this class. + class Builder + attr_reader :iseq, :stack + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + def initialize( + iseq, + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @iseq = iseq + @stack = iseq.stack + + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + end + + # This creates a new label at the current length of the instruction + # sequence. It is used as the operand for jump instructions. + def label + name = :"label_#{iseq.length}" + iseq.insns.last == name ? name : event(name) + end + + def event(name) + iseq.push(name) + name + end + + def adjuststack(number) + stack.change_by(-number) + iseq.push([:adjuststack, number]) + end + + def anytostring + stack.change_by(-2 + 1) + iseq.push([:anytostring]) + end + + def branchif(index) + stack.change_by(-1) + iseq.push([:branchif, index]) + end + + def branchnil(index) + stack.change_by(-1) + iseq.push([:branchnil, index]) + end + + def branchunless(index) + stack.change_by(-1) + iseq.push([:branchunless, index]) + end + + def checkkeyword(index, keyword_index) + stack.change_by(+1) + iseq.push([:checkkeyword, index, keyword_index]) + end + + def concatarray + stack.change_by(-2 + 1) + iseq.push([:concatarray]) + end + + def concatstrings(number) + stack.change_by(-number + 1) + iseq.push([:concatstrings, number]) + end + + def defined(type, name, message) + stack.change_by(-1 + 1) + iseq.push([:defined, type, name, message]) + end + + def defineclass(name, class_iseq, flags) + stack.change_by(-2 + 1) + iseq.push([:defineclass, name, class_iseq, flags]) + end + + def definemethod(name, method_iseq) + stack.change_by(0) + iseq.push([:definemethod, name, method_iseq]) + end + + def definesmethod(name, method_iseq) + stack.change_by(-1) + iseq.push([:definesmethod, name, method_iseq]) + end + + def dup + stack.change_by(-1 + 2) + iseq.push([:dup]) + end + + def duparray(object) + stack.change_by(+1) + iseq.push([:duparray, object]) + end + + def duphash(object) + stack.change_by(+1) + iseq.push([:duphash, object]) + end + + def dupn(number) + stack.change_by(+number) + iseq.push([:dupn, number]) + end + + def expandarray(length, flag) + stack.change_by(-1 + length) + iseq.push([:expandarray, length, flag]) + end + + def getblockparam(index, level) + stack.change_by(+1) + iseq.push([:getblockparam, index, level]) + end + + def getblockparamproxy(index, level) + stack.change_by(+1) + iseq.push([:getblockparamproxy, index, level]) + end + + def getclassvariable(name) + stack.change_by(+1) + + if RUBY_VERSION >= "3.0" + iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) + else + iseq.push([:getclassvariable, name]) + end + end + + def getconstant(name) + stack.change_by(-2 + 1) + iseq.push([:getconstant, name]) + end + + def getglobal(name) + stack.change_by(+1) + iseq.push([:getglobal, name]) + end + + def getinstancevariable(name) + stack.change_by(+1) + + if RUBY_VERSION >= "3.2" + iseq.push([:getinstancevariable, name, iseq.inline_storage]) + else + inline_storage = iseq.inline_storage_for(name) + iseq.push([:getinstancevariable, name, inline_storage]) + end + end + + def getlocal(index, level) + stack.change_by(+1) + + if operands_unification + # Specialize the getlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will look at the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:getlocal_WC_0, index]) + when 1 + iseq.push([:getlocal_WC_1, index]) + else + iseq.push([:getlocal, index, level]) + end + else + iseq.push([:getlocal, index, level]) + end + end + + def getspecial(key, type) + stack.change_by(-0 + 1) + iseq.push([:getspecial, key, type]) + end + + def intern + stack.change_by(-1 + 1) + iseq.push([:intern]) + end + + def invokeblock(method_id, argc, flag) + stack.change_by(-argc + 1) + iseq.push([:invokeblock, call_data(method_id, argc, flag)]) + end + + def invokesuper(method_id, argc, flag, block_iseq) + stack.change_by(-(argc + 1) + 1) + + cdata = call_data(method_id, argc, flag) + iseq.push([:invokesuper, cdata, block_iseq]) + end + + def jump(index) + stack.change_by(0) + iseq.push([:jump, index]) + end + + def leave + stack.change_by(-1) + iseq.push([:leave]) + end + + def newarray(length) + stack.change_by(-length + 1) + iseq.push([:newarray, length]) + end + + def newhash(length) + stack.change_by(-length + 1) + iseq.push([:newhash, length]) + end + + def newrange(flag) + stack.change_by(-2 + 1) + iseq.push([:newrange, flag]) + end + + def nop + stack.change_by(0) + iseq.push([:nop]) + end + + def objtostring(method_id, argc, flag) + stack.change_by(-1 + 1) + iseq.push([:objtostring, call_data(method_id, argc, flag)]) + end + + def once(postexe_iseq, inline_storage) + stack.change_by(+1) + iseq.push([:once, postexe_iseq, inline_storage]) + end + + def opt_getconstant_path(names) + if RUBY_VERSION >= "3.2" + stack.change_by(+1) + iseq.push([:opt_getconstant_path, names]) + else + inline_storage = iseq.inline_storage + getinlinecache = opt_getinlinecache(-1, inline_storage) + + if names[0] == :"" + names.shift + pop + putobject(Object) + end + + names.each_with_index do |name, index| + putobject(index == 0) + getconstant(name) + end + + opt_setinlinecache(inline_storage) + getinlinecache[1] = label + end + end + + def opt_getinlinecache(offset, inline_storage) + stack.change_by(+1) + iseq.push([:opt_getinlinecache, offset, inline_storage]) + end + + def opt_newarray_max(length) + if specialized_instruction + stack.change_by(-length + 1) + iseq.push([:opt_newarray_max, length]) + else + newarray(length) + send(:max, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_newarray_min(length) + if specialized_instruction + stack.change_by(-length + 1) + iseq.push([:opt_newarray_min, length]) + else + newarray(length) + send(:min, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_setinlinecache(inline_storage) + stack.change_by(-1 + 1) + iseq.push([:opt_setinlinecache, inline_storage]) + end + + def opt_str_freeze(value) + if specialized_instruction + stack.change_by(+1) + iseq.push( + [ + :opt_str_freeze, + value, + call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) + ] + ) + else + putstring(value) + send(:freeze, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_str_uminus(value) + if specialized_instruction + stack.change_by(+1) + iseq.push( + [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] + ) + else + putstring(value) + send(:-@, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def pop + stack.change_by(-1) + iseq.push([:pop]) + end + + def putnil + stack.change_by(+1) + iseq.push([:putnil]) + end + + def putobject(object) + stack.change_by(+1) + + if operands_unification + # Specialize the putobject instruction based on the value of the + # object. If it's 0 or 1, then there's a specialized instruction + # that will push the object onto the stack and requires fewer + # operands. + if object.eql?(0) + iseq.push([:putobject_INT2FIX_0_]) + elsif object.eql?(1) + iseq.push([:putobject_INT2FIX_1_]) + else + iseq.push([:putobject, object]) + end + else + iseq.push([:putobject, object]) + end + end + + def putself + stack.change_by(+1) + iseq.push([:putself]) + end + + def putspecialobject(object) + stack.change_by(+1) + iseq.push([:putspecialobject, object]) + end + + def putstring(object) + stack.change_by(+1) + iseq.push([:putstring, object]) + end + + def send(method_id, argc, flag, block_iseq = nil) + stack.change_by(-(argc + 1) + 1) + cdata = call_data(method_id, argc, flag) + + if specialized_instruction + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and the + # number of arguments. + + # stree-ignore + if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 + case [method_id, argc] + when [:length, 0] then iseq.push([:opt_length, cdata]) + when [:size, 0] then iseq.push([:opt_size, cdata]) + when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) + when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) + when [:succ, 0] then iseq.push([:opt_succ, cdata]) + when [:!, 0] then iseq.push([:opt_not, cdata]) + when [:+, 1] then iseq.push([:opt_plus, cdata]) + when [:-, 1] then iseq.push([:opt_minus, cdata]) + when [:*, 1] then iseq.push([:opt_mult, cdata]) + when [:/, 1] then iseq.push([:opt_div, cdata]) + when [:%, 1] then iseq.push([:opt_mod, cdata]) + when [:==, 1] then iseq.push([:opt_eq, cdata]) + when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) + when [:<, 1] then iseq.push([:opt_lt, cdata]) + when [:<=, 1] then iseq.push([:opt_le, cdata]) + when [:>, 1] then iseq.push([:opt_gt, cdata]) + when [:>=, 1] then iseq.push([:opt_ge, cdata]) + when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) + when [:[], 1] then iseq.push([:opt_aref, cdata]) + when [:&, 1] then iseq.push([:opt_and, cdata]) + when [:|, 1] then iseq.push([:opt_or, cdata]) + when [:[]=, 2] then iseq.push([:opt_aset, cdata]) + when [:!=, 1] + eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) + iseq.push([:opt_neq, eql_data, cdata]) + else + iseq.push([:opt_send_without_block, cdata]) + end + else + iseq.push([:send, cdata, block_iseq]) + end + else + iseq.push([:send, cdata, block_iseq]) + end + end + + def setclassvariable(name) + stack.change_by(-1) + + if RUBY_VERSION >= "3.0" + iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) + else + iseq.push([:setclassvariable, name]) + end + end + + def setconstant(name) + stack.change_by(-2) + iseq.push([:setconstant, name]) + end + + def setglobal(name) + stack.change_by(-1) + iseq.push([:setglobal, name]) + end + + def setinstancevariable(name) + stack.change_by(-1) + + if RUBY_VERSION >= "3.2" + iseq.push([:setinstancevariable, name, iseq.inline_storage]) + else + inline_storage = iseq.inline_storage_for(name) + iseq.push([:setinstancevariable, name, inline_storage]) + end + end + + def setlocal(index, level) + stack.change_by(-1) + + if operands_unification + # Specialize the setlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will write to the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:setlocal_WC_0, index]) + when 1 + iseq.push([:setlocal_WC_1, index]) + else + iseq.push([:setlocal, index, level]) + end + else + iseq.push([:setlocal, index, level]) + end + end + + def setn(number) + stack.change_by(-1 + 1) + iseq.push([:setn, number]) + end + + def splatarray(flag) + stack.change_by(-1 + 1) + iseq.push([:splatarray, flag]) + end + + def swap + stack.change_by(-2 + 2) + iseq.push([:swap]) + end + + def topn(number) + stack.change_by(+1) + iseq.push([:topn, number]) + end + + def toregexp(options, length) + stack.change_by(-length + 1) + iseq.push([:toregexp, options, length]) + end + + private + + # This creates a call data object that is used as the operand for the + # send, invokesuper, and objtostring instructions. + def call_data(method_id, argc, flag) + { mid: method_id, flag: flag, orig_argc: argc } + end + end + + # These constants correspond to the putspecialobject instruction. They are + # used to represent special objects that are pushed onto the stack. + VM_SPECIAL_OBJECT_VMCORE = 1 + VM_SPECIAL_OBJECT_CBASE = 2 + VM_SPECIAL_OBJECT_CONST_BASE = 3 + + # These constants correspond to the flag passed as part of the call data + # structure on the send instruction. They are used to represent various + # metadata about the callsite (e.g., were keyword arguments used?, was a + # block given?, etc.). + VM_CALL_ARGS_SPLAT = 1 << 0 + VM_CALL_ARGS_BLOCKARG = 1 << 1 + VM_CALL_FCALL = 1 << 2 + VM_CALL_VCALL = 1 << 3 + VM_CALL_ARGS_SIMPLE = 1 << 4 + VM_CALL_BLOCKISEQ = 1 << 5 + VM_CALL_KWARG = 1 << 6 + VM_CALL_KW_SPLAT = 1 << 7 + VM_CALL_TAILCALL = 1 << 8 + VM_CALL_SUPER = 1 << 9 + VM_CALL_ZSUPER = 1 << 10 + VM_CALL_OPT_SEND = 1 << 11 + VM_CALL_KW_SPLAT_MUT = 1 << 12 + + # These constants correspond to the value passed as part of the defined + # instruction. It's an enum defined in the CRuby codebase that tells that + # instruction what kind of defined check to perform. + DEFINED_NIL = 1 + DEFINED_IVAR = 2 + DEFINED_LVAR = 3 + DEFINED_GVAR = 4 + DEFINED_CVAR = 5 + DEFINED_CONST = 6 + DEFINED_METHOD = 7 + DEFINED_YIELD = 8 + DEFINED_ZSUPER = 9 + DEFINED_SELF = 10 + DEFINED_TRUE = 11 + DEFINED_FALSE = 12 + DEFINED_ASGN = 13 + DEFINED_EXPR = 14 + DEFINED_REF = 15 + DEFINED_FUNC = 16 + DEFINED_CONST_FROM = 17 + + # These constants correspond to the value passed in the flags as part of + # the defineclass instruction. + VM_DEFINECLASS_TYPE_CLASS = 0 + VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 + VM_DEFINECLASS_TYPE_MODULE = 2 + VM_DEFINECLASS_FLAG_SCOPED = 8 + VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 + end +end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index cdf2860e..3b8c0ea2 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -449,7 +449,7 @@ def serialize_iseq(iseq) when Array insn.map do |operand| if operand.is_a?(Array) && - operand[0] == Compiler::InstructionSequence::MAGIC + operand[0] == YARV::InstructionSequence::MAGIC serialize_iseq(operand) else operand From 6c6b88b1f4eeb5f43164d6eb81c5c8272dbd4315 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 20:30:07 -0500 Subject: [PATCH 03/21] Start the disassembler --- lib/syntax_tree/yarv.rb | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 42faa66b..e3780a0c 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -274,6 +274,43 @@ def serialize(insn) end end + # This class is responsible for taking a compiled instruction sequence and + # walking through it to generate equivalent Ruby code. + class Disassembler + attr_reader :iseq + + def initialize(iseq) + @iseq = iseq + end + + def to_ruby + stack = [] + + iseq.insns.each do |insn| + case insn[0] + when :leave + stack << ReturnNode.new(arguments: Args.new(parts: [stack.pop], location: Location.default), location: Location.default) + when :opt_plus + left, right = stack.pop(2) + stack << Binary.new(left: left, operator: :+, right: right, location: Location.default) + when :putobject + case insn[1] + when Integer + stack << Int.new(value: insn[1].inspect, location: Location.default) + else + raise "Unknown object type: #{insn[1].class.name}" + end + when :putobject_INT2FIX_1_ + stack << Int.new(value: "1", location: Location.default) + else + raise "Unknown instruction #{insn[0]}" + end + end + + Statements.new(nil, body: stack, location: Location.default) + end + end + # This class serves as a layer of indirection between the instruction # sequence and the compiler. It allows us to provide different behavior # for certain instructions depending on the Ruby version. For example, From c9db96bc925c10d80e530a3238ce50980aa57f3f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 20:32:52 -0500 Subject: [PATCH 04/21] opt_mult, Float, and Rational --- lib/syntax_tree/yarv.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index e3780a0c..cbb91f1e 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -290,13 +290,20 @@ def to_ruby case insn[0] when :leave stack << ReturnNode.new(arguments: Args.new(parts: [stack.pop], location: Location.default), location: Location.default) + when :opt_mult + left, right = stack.pop(2) + stack << Binary.new(left: left, operator: :*, right: right, location: Location.default) when :opt_plus left, right = stack.pop(2) stack << Binary.new(left: left, operator: :+, right: right, location: Location.default) when :putobject case insn[1] + when Float + stack << FloatLiteral.new(value: insn[1].inspect, location: Location.default) when Integer stack << Int.new(value: insn[1].inspect, location: Location.default) + when Rational + stack << RationalLiteral.new(value: insn[1].inspect, location: Location.default) else raise "Unknown object type: #{insn[1].class.name}" end From 8ad799ad2dfb73ec90b8a8def55b7c088fc45bed Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 20:37:27 -0500 Subject: [PATCH 05/21] Local variables and assignments --- lib/syntax_tree/yarv.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index cbb91f1e..7290d87f 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -288,6 +288,9 @@ def to_ruby iseq.insns.each do |insn| case insn[0] + when :getlocal_WC_0 + value = iseq.local_table.locals[insn[1]].name.to_s + stack << VarRef.new(value: Ident.new(value: value, location: Location.default), location: Location.default) when :leave stack << ReturnNode.new(arguments: Args.new(parts: [stack.pop], location: Location.default), location: Location.default) when :opt_mult @@ -309,6 +312,9 @@ def to_ruby end when :putobject_INT2FIX_1_ stack << Int.new(value: "1", location: Location.default) + when :setlocal_WC_0 + target = VarField.new(value: Ident.new(value: iseq.local_table.locals[insn[1]].name.to_s, location: Location.default), location: Location.default) + stack << Assign.new(target: target, value: stack.pop, location: Location.default) else raise "Unknown instruction #{insn[0]}" end From 0047065d4227b141e0d9d17542696b5adb75e12b Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 19 Nov 2022 14:48:55 -0500 Subject: [PATCH 06/21] Inline builder into ISeq --- lib/syntax_tree/compiler.rb | 912 +++++++++++++++++------------------- lib/syntax_tree/yarv.rb | 485 ++++++++++--------- 2 files changed, 687 insertions(+), 710 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index c936c9c1..424a9cf5 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -192,11 +192,7 @@ def visit_unsupported(_node) :specialized_instruction # The current instruction sequence that is being compiled. - attr_reader :current_iseq - - # This is the current builder that is being used to construct the current - # instruction sequence. - attr_reader :builder + attr_reader :iseq # A boolean to track if we're currently compiling the last statement # within a set of statements. This information is necessary to determine @@ -212,8 +208,7 @@ def initialize( @operands_unification = operands_unification @specialized_instruction = specialized_instruction - @current_iseq = nil - @builder = nil + @iseq = nil @last_statement = false end @@ -223,45 +218,45 @@ def visit_BEGIN(node) def visit_CHAR(node) if frozen_string_literal - builder.putobject(node.value[1..]) + iseq.putobject(node.value[1..]) else - builder.putstring(node.value[1..]) + iseq.putstring(node.value[1..]) end end def visit_END(node) - name = "block in #{current_iseq.name}" + name = "block in #{iseq.name}" once_iseq = - with_instruction_sequence(:block, name, current_iseq, node) do + with_instruction_sequence(:block, name, node) do postexe_iseq = - with_instruction_sequence(:block, name, current_iseq, node) do + with_instruction_sequence(:block, name, node) do *statements, last_statement = node.statements.body visit_all(statements) with_last_statement { visit(last_statement) } - builder.leave + iseq.leave end - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - builder.send(:"core#set_postexe", 0, YARV::VM_CALL_FCALL, postexe_iseq) - builder.leave + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.send(:"core#set_postexe", 0, YARV::VM_CALL_FCALL, postexe_iseq) + iseq.leave end - builder.once(once_iseq, current_iseq.inline_storage) - builder.pop + iseq.once(once_iseq, iseq.inline_storage) + iseq.pop end def visit_alias(node) - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(node.left) visit(node.right) - builder.send(:"core#set_method_alias", 3, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:"core#set_method_alias", 3, YARV::VM_CALL_ARGS_SIMPLE) end def visit_aref(node) visit(node.collection) visit(node.index) - builder.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) end def visit_arg_block(node) @@ -274,7 +269,7 @@ def visit_arg_paren(node) def visit_arg_star(node) visit(node.value) - builder.splatarray(false) + iseq.splatarray(false) end def visit_args(node) @@ -283,99 +278,97 @@ def visit_args(node) def visit_array(node) if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) + iseq.duparray(compiled) else length = 0 node.contents.parts.each do |part| if part.is_a?(ArgStar) if length > 0 - builder.newarray(length) + iseq.newarray(length) length = 0 end visit(part.value) - builder.concatarray + iseq.concatarray else visit(part) length += 1 end end - builder.newarray(length) if length > 0 - if length > 0 && length != node.contents.parts.length - builder.concatarray - end + iseq.newarray(length) if length > 0 + iseq.concatarray if length > 0 && length != node.contents.parts.length end end def visit_assign(node) case node.target when ARefField - builder.putnil + iseq.putnil visit(node.target.collection) visit(node.target.index) visit(node.value) - builder.setn(3) - builder.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) - builder.pop + iseq.setn(3) + iseq.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.pop when ConstPathField names = constant_names(node.target) name = names.pop if RUBY_VERSION >= "3.2" - builder.opt_getconstant_path(names) + iseq.opt_getconstant_path(names) visit(node.value) - builder.swap - builder.topn(1) - builder.swap - builder.setconstant(name) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) else visit(node.value) - builder.dup if last_statement? - builder.opt_getconstant_path(names) - builder.setconstant(name) + iseq.dup if last_statement? + iseq.opt_getconstant_path(names) + iseq.setconstant(name) end when Field - builder.putnil + iseq.putnil visit(node.target) visit(node.value) - builder.setn(2) - builder.send(:"#{node.target.name.value}=", 1, YARV::VM_CALL_ARGS_SIMPLE) - builder.pop + iseq.setn(2) + iseq.send(:"#{node.target.name.value}=", 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.pop when TopConstField name = node.target.constant.value.to_sym if RUBY_VERSION >= "3.2" - builder.putobject(Object) + iseq.putobject(Object) visit(node.value) - builder.swap - builder.topn(1) - builder.swap - builder.setconstant(name) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) else visit(node.value) - builder.dup if last_statement? - builder.putobject(Object) - builder.setconstant(name) + iseq.dup if last_statement? + iseq.putobject(Object) + iseq.setconstant(name) end when VarField visit(node.value) - builder.dup if last_statement? + iseq.dup if last_statement? case node.target.value when Const - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(node.target.value.value.to_sym) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) + iseq.setconstant(node.target.value.value.to_sym) when CVar - builder.setclassvariable(node.target.value.value.to_sym) + iseq.setclassvariable(node.target.value.value.to_sym) when GVar - builder.setglobal(node.target.value.value.to_sym) + iseq.setglobal(node.target.value.value.to_sym) when Ident local_variable = visit(node.target) - builder.setlocal(local_variable.index, local_variable.level) + iseq.setlocal(local_variable.index, local_variable.level) when IVar - builder.setinstancevariable(node.target.value.value.to_sym) + iseq.setinstancevariable(node.target.value.value.to_sym) end end end @@ -390,12 +383,12 @@ def visit_assoc_splat(node) end def visit_backref(node) - builder.getspecial(1, 2 * node.value[1..].to_i) + iseq.getspecial(1, 2 * node.value[1..].to_i) end def visit_bare_assoc_hash(node) if (compiled = RubyVisitor.compile(node)) - builder.duphash(compiled) + iseq.duphash(compiled) else visit_all(node.assocs) end @@ -405,41 +398,36 @@ def visit_binary(node) case node.operator when :"&&" visit(node.left) - builder.dup + iseq.dup - branchunless = builder.branchunless(-1) - builder.pop + branchunless = iseq.branchunless(-1) + iseq.pop visit(node.right) - branchunless[1] = builder.label + branchunless[1] = iseq.label when :"||" visit(node.left) - builder.dup + iseq.dup - branchif = builder.branchif(-1) - builder.pop + branchif = iseq.branchif(-1) + iseq.pop visit(node.right) - branchif[1] = builder.label + branchif[1] = iseq.label else visit(node.left) visit(node.right) - builder.send(node.operator, 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(node.operator, 1, YARV::VM_CALL_ARGS_SIMPLE) end end def visit_block(node) - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_B_CALL) + with_instruction_sequence(:block, "block in #{iseq.name}", node) do + iseq.event(:RUBY_EVENT_B_CALL) visit(node.block_var) visit(node.bodystmt) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave end end @@ -447,22 +435,20 @@ def visit_block_var(node) params = node.params if params.requireds.length == 1 && params.optionals.empty? && - !params.rest && params.posts.empty? && params.keywords.empty? && - !params.keyword_rest && !params.block - current_iseq.argument_options[:ambiguous_param0] = true + !params.rest && params.posts.empty? && params.keywords.empty? && + !params.keyword_rest && !params.block + iseq.argument_options[:ambiguous_param0] = true end visit(node.params) - node.locals.each do |local| - current_iseq.local_table.plain(local.value.to_sym) - end + node.locals.each { |local| iseq.local_table.plain(local.value.to_sym) } end def visit_blockarg(node) - current_iseq.argument_options[:block_start] = current_iseq.argument_size - current_iseq.local_table.block(node.name.value.to_sym) - current_iseq.argument_size += 1 + iseq.argument_options[:block_start] = iseq.argument_size + iseq.local_table.block(node.name.value.to_sym) + iseq.argument_size += 1 end def visit_bodystmt(node) @@ -497,15 +483,15 @@ def visit_call(node) parts = node.receiver.contents&.parts || [] if parts.none? { |part| part.is_a?(ArgStar) } && - RubyVisitor.compile(node.receiver).nil? + RubyVisitor.compile(node.receiver).nil? case node.message.value when "max" visit(node.receiver.contents) - builder.opt_newarray_max(parts.length) + iseq.opt_newarray_max(parts.length) return when "min" visit(node.receiver.contents) - builder.opt_newarray_min(parts.length) + iseq.opt_newarray_min(parts.length) return end end @@ -513,10 +499,10 @@ def visit_call(node) if RubyVisitor.compile(node.receiver).nil? case node.message.value when "-@" - builder.opt_str_uminus(node.receiver.parts.first.value) + iseq.opt_str_uminus(node.receiver.parts.first.value) return when "freeze" - builder.opt_str_freeze(node.receiver.parts.first.value) + iseq.opt_str_freeze(node.receiver.parts.first.value) return end end @@ -525,10 +511,10 @@ def visit_call(node) if node.receiver if node.receiver.is_a?(VarRef) - lookup = current_iseq.local_variable(node.receiver.value.value.to_sym) + lookup = iseq.local_variable(node.receiver.value.value.to_sym) if lookup.local.is_a?(YARV::LocalTable::BlockLocal) - builder.getblockparamproxy(lookup.index, lookup.level) + iseq.getblockparamproxy(lookup.index, lookup.level) else visit(node.receiver) end @@ -536,13 +522,13 @@ def visit_call(node) visit(node.receiver) end else - builder.putself + iseq.putself end branchnil = if node.operator&.value == "&." - builder.dup - builder.branchnil(-1) + iseq.dup + iseq.branchnil(-1) end flag = 0 @@ -559,12 +545,12 @@ def visit_call(node) when ArgsForward flag |= YARV::VM_CALL_ARGS_SPLAT | YARV::VM_CALL_ARGS_BLOCKARG - lookup = current_iseq.local_table.find(:*, 0) - builder.getlocal(lookup.index, lookup.level) - builder.splatarray(arg_parts.length != 1) + lookup = iseq.local_table.find(:*, 0) + iseq.getlocal(lookup.index, lookup.level) + iseq.splatarray(arg_parts.length != 1) - lookup = current_iseq.local_table.find(:&, 0) - builder.getblockparamproxy(lookup.index, lookup.level) + lookup = iseq.local_table.find(:&, 0) + iseq.getblockparamproxy(lookup.index, lookup.level) when BareAssocHash flag |= YARV::VM_CALL_KW_SPLAT visit(arg_part) @@ -577,8 +563,8 @@ def visit_call(node) flag |= YARV::VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 flag |= YARV::VM_CALL_FCALL if node.receiver.nil? - builder.send(node.message.value.to_sym, argc, flag, block_iseq) - branchnil[1] = builder.label if branchnil + iseq.send(node.message.value.to_sym, argc, flag, block_iseq) + branchnil[1] = iseq.label if branchnil end def visit_case(node) @@ -586,7 +572,6 @@ def visit_case(node) clauses = [] else_clause = nil - current = node.consequent while current @@ -601,21 +586,19 @@ def visit_case(node) branches = clauses.map do |clause| visit(clause.arguments) - builder.topn(1) - builder.send(:===, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) - [clause, builder.branchif(:label_00)] + iseq.topn(1) + iseq.send(:===, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) + [clause, iseq.branchif(:label_00)] end - builder.pop - - else_clause ? visit(else_clause) : builder.putnil - - builder.leave + iseq.pop + else_clause ? visit(else_clause) : iseq.putnil + iseq.leave branches.each_with_index do |(clause, branchif), index| - builder.leave if index != 0 - branchif[1] = builder.label - builder.pop + iseq.leave if index != 0 + branchif[1] = iseq.label + iseq.pop visit(clause) end end @@ -623,16 +606,11 @@ def visit_case(node) def visit_class(node) name = node.constant.constant.value.to_sym class_iseq = - with_instruction_sequence( - :class, - "", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) + with_instruction_sequence(:class, "", node) do + iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave + iseq.event(:RUBY_EVENT_END) + iseq.leave end flags = YARV::VM_DEFINECLASS_TYPE_CLASS @@ -642,20 +620,20 @@ def visit_class(node) flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED visit(node.constant.parent) when ConstRef - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED - builder.putobject(Object) + iseq.putobject(Object) end if node.superclass flags |= YARV::VM_DEFINECLASS_FLAG_HAS_SUPERCLASS visit(node.superclass) else - builder.putnil + iseq.putnil end - builder.defineclass(name, class_iseq, flags) + iseq.defineclass(name, class_iseq, flags) end def visit_command(node) @@ -690,34 +668,29 @@ def visit_const_path_field(node) def visit_const_path_ref(node) names = constant_names(node) - builder.opt_getconstant_path(names) + iseq.opt_getconstant_path(names) end def visit_def(node) method_iseq = - with_instruction_sequence( - :method, - node.name.value, - current_iseq, - node - ) do + with_instruction_sequence(:method, node.name.value, node) do visit(node.params) if node.params - builder.event(:RUBY_EVENT_CALL) + iseq.event(:RUBY_EVENT_CALL) visit(node.bodystmt) - builder.event(:RUBY_EVENT_RETURN) - builder.leave + iseq.event(:RUBY_EVENT_RETURN) + iseq.leave end name = node.name.value.to_sym if node.target visit(node.target) - builder.definesmethod(name, method_iseq) + iseq.definesmethod(name, method_iseq) else - builder.definemethod(name, method_iseq) + iseq.definemethod(name, method_iseq) end - builder.putobject(name) + iseq.putobject(name) end def visit_defined(node) @@ -726,67 +699,67 @@ def visit_defined(node) # If we're assigning to a local variable, then we need to make sure # that we put it into the local table. if node.value.target.is_a?(VarField) && - node.value.target.value.is_a?(Ident) - current_iseq.local_table.plain(node.value.target.value.value.to_sym) + node.value.target.value.is_a?(Ident) + iseq.local_table.plain(node.value.target.value.value.to_sym) end - builder.putobject("assignment") + iseq.putobject("assignment") when VarRef value = node.value.value name = value.value.to_sym case value when Const - builder.putnil - builder.defined(YARV::DEFINED_CONST, name, "constant") + iseq.putnil + iseq.defined(YARV::DEFINED_CONST, name, "constant") when CVar - builder.putnil - builder.defined(YARV::DEFINED_CVAR, name, "class variable") + iseq.putnil + iseq.defined(YARV::DEFINED_CVAR, name, "class variable") when GVar - builder.putnil - builder.defined(YARV::DEFINED_GVAR, name, "global-variable") + iseq.putnil + iseq.defined(YARV::DEFINED_GVAR, name, "global-variable") when Ident - builder.putobject("local-variable") + iseq.putobject("local-variable") when IVar - builder.putnil - builder.defined(YARV::DEFINED_IVAR, name, "instance-variable") + iseq.putnil + iseq.defined(YARV::DEFINED_IVAR, name, "instance-variable") when Kw case name when :false - builder.putobject("false") + iseq.putobject("false") when :nil - builder.putobject("nil") + iseq.putobject("nil") when :self - builder.putobject("self") + iseq.putobject("self") when :true - builder.putobject("true") + iseq.putobject("true") end end when VCall - builder.putself + iseq.putself name = node.value.value.value.to_sym - builder.defined(YARV::DEFINED_FUNC, name, "method") + iseq.defined(YARV::DEFINED_FUNC, name, "method") when YieldNode - builder.putnil - builder.defined(YARV::DEFINED_YIELD, false, "yield") + iseq.putnil + iseq.defined(YARV::DEFINED_YIELD, false, "yield") when ZSuper - builder.putnil - builder.defined(YARV::DEFINED_ZSUPER, false, "super") + iseq.putnil + iseq.defined(YARV::DEFINED_ZSUPER, false, "super") else - builder.putobject("expression") + iseq.putobject("expression") end end def visit_dyna_symbol(node) if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - builder.putobject(node.parts.first.value.to_sym) + iseq.putobject(node.parts.first.value.to_sym) end end def visit_else(node) visit(node.statements) - builder.pop unless last_statement? + iseq.pop unless last_statement? end def visit_elsif(node) @@ -805,51 +778,50 @@ def visit_field(node) end def visit_float(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_for(node) visit(node.collection) name = node.index.value.value.to_sym - current_iseq.local_table.plain(name) + iseq.local_table.plain(name) block_iseq = with_instruction_sequence( :block, - "block in #{current_iseq.name}", - current_iseq, + "block in #{iseq.name}", node.statements ) do - current_iseq.argument_options[:lead_num] ||= 0 - current_iseq.argument_options[:lead_num] += 1 - current_iseq.argument_options[:ambiguous_param0] = true + iseq.argument_options[:lead_num] ||= 0 + iseq.argument_options[:lead_num] += 1 + iseq.argument_options[:ambiguous_param0] = true - current_iseq.argument_size += 1 - current_iseq.local_table.plain(2) + iseq.argument_size += 1 + iseq.local_table.plain(2) - builder.getlocal(0, 0) + iseq.getlocal(0, 0) - local_variable = current_iseq.local_variable(name) - builder.setlocal(local_variable.index, local_variable.level) + local_variable = iseq.local_variable(name) + iseq.setlocal(local_variable.index, local_variable.level) - builder.event(:RUBY_EVENT_B_CALL) - builder.nop + iseq.event(:RUBY_EVENT_B_CALL) + iseq.nop visit(node.statements) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave end - builder.send(:each, 0, 0, block_iseq) + iseq.send(:each, 0, 0, block_iseq) end def visit_hash(node) if (compiled = RubyVisitor.compile(node)) - builder.duphash(compiled) + iseq.duphash(compiled) else visit_all(node.assocs) - builder.newhash(node.assocs.length * 2) + iseq.newhash(node.assocs.length * 2) end end @@ -860,30 +832,30 @@ def visit_heredoc(node) visit(node.parts.first) else length = visit_string_parts(node) - builder.concatstrings(length) + iseq.concatstrings(length) end end def visit_if(node) visit(node.predicate) - branchunless = builder.branchunless(-1) + branchunless = iseq.branchunless(-1) visit(node.statements) if last_statement? - builder.leave - branchunless[1] = builder.label + iseq.leave + branchunless[1] = iseq.label - node.consequent ? visit(node.consequent) : builder.putnil + node.consequent ? visit(node.consequent) : iseq.putnil else - builder.pop + iseq.pop if node.consequent - jump = builder.jump(-1) - branchunless[1] = builder.label + jump = iseq.jump(-1) + branchunless[1] = iseq.label visit(node.consequent) - jump[1] = builder.label + jump[1] = iseq.label else - branchunless[1] = builder.label + branchunless[1] = iseq.label end end end @@ -905,40 +877,35 @@ def visit_if_op(node) end def visit_imaginary(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_int(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_kwrest_param(node) - current_iseq.argument_options[:kwrest] = current_iseq.argument_size - current_iseq.argument_size += 1 - current_iseq.local_table.plain(node.name.value.to_sym) + iseq.argument_options[:kwrest] = iseq.argument_size + iseq.argument_size += 1 + iseq.local_table.plain(node.name.value.to_sym) end def visit_label(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_lambda(node) lambda_iseq = - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_B_CALL) + with_instruction_sequence(:block, "block in #{iseq.name}", node) do + iseq.event(:RUBY_EVENT_B_CALL) visit(node.params) visit(node.statements) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave end - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - builder.send(:lambda, 0, YARV::VM_CALL_FCALL, lambda_iseq) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.send(:lambda, 0, YARV::VM_CALL_FCALL, lambda_iseq) end def visit_lambda_var(node) @@ -947,7 +914,7 @@ def visit_lambda_var(node) def visit_massign(node) visit(node.value) - builder.dup + iseq.dup visit(node.target) end @@ -966,7 +933,6 @@ def visit_method_add_block(node) def visit_mlhs(node) lookups = [] - node.parts.each do |part| case part when VarField @@ -974,24 +940,18 @@ def visit_mlhs(node) end end - builder.expandarray(lookups.length, 0) - - lookups.each { |lookup| builder.setlocal(lookup.index, lookup.level) } + iseq.expandarray(lookups.length, 0) + lookups.each { |lookup| iseq.setlocal(lookup.index, lookup.level) } end def visit_module(node) name = node.constant.constant.value.to_sym module_iseq = - with_instruction_sequence( - :class, - "", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) + with_instruction_sequence(:class, "", node) do + iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave + iseq.event(:RUBY_EVENT_END) + iseq.leave end flags = YARV::VM_DEFINECLASS_TYPE_MODULE @@ -1001,28 +961,28 @@ def visit_module(node) flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED visit(node.constant.parent) when ConstRef - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED - builder.putobject(Object) + iseq.putobject(Object) end - builder.putnil - builder.defineclass(name, module_iseq, flags) + iseq.putnil + iseq.defineclass(name, module_iseq, flags) end def visit_mrhs(node) if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) + iseq.duparray(compiled) else visit_all(node.parts) - builder.newarray(node.parts.length) + iseq.newarray(node.parts.length) end end def visit_not(node) visit(node.statement) - builder.send(:!, 0, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:!, 0, YARV::VM_CALL_ARGS_SIMPLE) end def visit_opassign(node) @@ -1036,31 +996,30 @@ def visit_opassign(node) branchunless = nil with_opassign(node) do - builder.dup - branchunless = builder.branchunless(-1) - builder.pop + iseq.dup + branchunless = iseq.branchunless(-1) + iseq.pop visit(node.value) end case node.target when ARefField - builder.leave - branchunless[1] = builder.label - builder.setn(3) - builder.adjuststack(3) + iseq.leave + branchunless[1] = iseq.label + iseq.setn(3) + iseq.adjuststack(3) when ConstPathField, TopConstField - branchunless[1] = builder.label - builder.swap - builder.pop + branchunless[1] = iseq.label + iseq.swap + iseq.pop else - branchunless[1] = builder.label + branchunless[1] = iseq.label end when :"||" - if node.target.is_a?(ConstPathField) || - node.target.is_a?(TopConstField) + if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) opassign_defined(node) - builder.swap - builder.pop + iseq.swap + iseq.pop elsif node.target.is_a?(VarField) && [Const, CVar, GVar].include?(node.target.value.class) opassign_defined(node) @@ -1068,67 +1027,65 @@ def visit_opassign(node) branchif = nil with_opassign(node) do - builder.dup - branchif = builder.branchif(-1) - builder.pop + iseq.dup + branchif = iseq.branchif(-1) + iseq.pop visit(node.value) end if node.target.is_a?(ARefField) - builder.leave - branchif[1] = builder.label - builder.setn(3) - builder.adjuststack(3) + iseq.leave + branchif[1] = iseq.label + iseq.setn(3) + iseq.adjuststack(3) else - branchif[1] = builder.label + branchif[1] = iseq.label end end else with_opassign(node) do visit(node.value) - builder.send(operator, 1, flag) + iseq.send(operator, 1, flag) end end end def visit_params(node) - argument_options = current_iseq.argument_options + argument_options = iseq.argument_options if node.requireds.any? argument_options[:lead_num] = 0 node.requireds.each do |required| - current_iseq.local_table.plain(required.value.to_sym) - current_iseq.argument_size += 1 + iseq.local_table.plain(required.value.to_sym) + iseq.argument_size += 1 argument_options[:lead_num] += 1 end end node.optionals.each do |(optional, value)| - index = current_iseq.local_table.size + index = iseq.local_table.size name = optional.value.to_sym - current_iseq.local_table.plain(name) - current_iseq.argument_size += 1 + iseq.local_table.plain(name) + iseq.argument_size += 1 - unless argument_options.key?(:opt) - argument_options[:opt] = [builder.label] - end + argument_options[:opt] = [iseq.label] unless argument_options.key?(:opt) visit(value) - builder.setlocal(index, 0) - current_iseq.argument_options[:opt] << builder.label + iseq.setlocal(index, 0) + iseq.argument_options[:opt] << iseq.label end visit(node.rest) if node.rest if node.posts.any? - argument_options[:post_start] = current_iseq.argument_size + argument_options[:post_start] = iseq.argument_size argument_options[:post_num] = 0 node.posts.each do |post| - current_iseq.local_table.plain(post.value.to_sym) - current_iseq.argument_size += 1 + iseq.local_table.plain(post.value.to_sym) + iseq.argument_size += 1 argument_options[:post_num] += 1 end end @@ -1140,10 +1097,10 @@ def visit_params(node) node.keywords.each_with_index do |(keyword, value), keyword_index| name = keyword.value.chomp(":").to_sym - index = current_iseq.local_table.size + index = iseq.local_table.size - current_iseq.local_table.plain(name) - current_iseq.argument_size += 1 + iseq.local_table.plain(name) + iseq.argument_size += 1 argument_options[:kwbits] += 1 if value.nil? @@ -1153,34 +1110,30 @@ def visit_params(node) argument_options[:keyword] << [name, compiled] else argument_options[:keyword] << [name] - checkkeywords << builder.checkkeyword(-1, keyword_index) - branchif = builder.branchif(-1) + checkkeywords << iseq.checkkeyword(-1, keyword_index) + branchif = iseq.branchif(-1) visit(value) - builder.setlocal(index, 0) - branchif[1] = builder.label + iseq.setlocal(index, 0) + branchif[1] = iseq.label end end name = node.keyword_rest ? 3 : 2 - current_iseq.argument_size += 1 - current_iseq.local_table.plain(name) + iseq.argument_size += 1 + iseq.local_table.plain(name) - lookup = current_iseq.local_table.find(name, 0) + lookup = iseq.local_table.find(name, 0) checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } end if node.keyword_rest.is_a?(ArgsForward) - current_iseq.local_table.plain(:*) - current_iseq.local_table.plain(:&) + iseq.local_table.plain(:*) + iseq.local_table.plain(:&) - current_iseq.argument_options[ - :rest_start - ] = current_iseq.argument_size - current_iseq.argument_options[ - :block_start - ] = current_iseq.argument_size + 1 + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_options[:block_start] = iseq.argument_size + 1 - current_iseq.argument_size += 2 + iseq.argument_size += 2 elsif node.keyword_rest visit(node.keyword_rest) end @@ -1215,82 +1168,77 @@ def visit_program(node) end end - with_instruction_sequence(:top, "", nil, node) do + with_instruction_sequence(:top, "", node) do visit_all(preexes) if statements.empty? - builder.putnil + iseq.putnil else *statements, last_statement = statements visit_all(statements) with_last_statement { visit(last_statement) } end - builder.leave + iseq.leave end end def visit_qsymbols(node) - builder.duparray(node.accept(RubyVisitor.new)) + iseq.duparray(node.accept(RubyVisitor.new)) end def visit_qwords(node) if frozen_string_literal - builder.duparray(node.accept(RubyVisitor.new)) + iseq.duparray(node.accept(RubyVisitor.new)) else visit_all(node.elements) - builder.newarray(node.elements.length) + iseq.newarray(node.elements.length) end end def visit_range(node) if (compiled = RubyVisitor.compile(node)) - builder.putobject(compiled) + iseq.putobject(compiled) else visit(node.left) visit(node.right) - builder.newrange(node.operator.value == ".." ? 0 : 1) + iseq.newrange(node.operator.value == ".." ? 0 : 1) end end def visit_rational(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_regexp_literal(node) if (compiled = RubyVisitor.compile(node)) - builder.putobject(compiled) + iseq.putobject(compiled) else flags = RubyVisitor.new.visit_regexp_literal_flags(node) length = visit_string_parts(node) - builder.toregexp(flags, length) + iseq.toregexp(flags, length) end end def visit_rest_param(node) - current_iseq.local_table.plain(node.name.value.to_sym) - current_iseq.argument_options[:rest_start] = current_iseq.argument_size - current_iseq.argument_size += 1 + iseq.local_table.plain(node.name.value.to_sym) + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_size += 1 end def visit_sclass(node) visit(node.target) - builder.putnil + iseq.putnil singleton_iseq = - with_instruction_sequence( - :class, - "singleton class", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) + with_instruction_sequence(:class, "singleton class", node) do + iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave + iseq.event(:RUBY_EVENT_END) + iseq.leave end - builder.defineclass( + iseq.defineclass( :singletonclass, singleton_iseq, YARV::VM_DEFINECLASS_TYPE_SINGLETON_CLASS @@ -1308,20 +1256,19 @@ def visit_statements(node) end end - statements.empty? ? builder.putnil : visit_all(statements) + statements.empty? ? iseq.putnil : visit_all(statements) end def visit_string_concat(node) value = node.left.parts.first.value + node.right.parts.first.value - content = TStringContent.new(value: value, location: node.location) - literal = + visit_string_literal( StringLiteral.new( - parts: [content], + parts: [TStringContent.new(value: value, location: node.location)], quote: node.left.quote, location: node.location ) - visit_string_literal(literal) + ) end def visit_string_embexpr(node) @@ -1333,14 +1280,14 @@ def visit_string_literal(node) visit(node.parts.first) else length = visit_string_parts(node) - builder.concatstrings(length) + iseq.concatstrings(length) end end def visit_super(node) - builder.putself + iseq.putself visit(node.arguments) - builder.invokesuper( + iseq.invokesuper( nil, argument_parts(node.arguments).length, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER, @@ -1349,37 +1296,37 @@ def visit_super(node) end def visit_symbol_literal(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_symbols(node) if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) + iseq.duparray(compiled) else node.elements.each do |element| if element.parts.length == 1 && - element.parts.first.is_a?(TStringContent) - builder.putobject(element.parts.first.value.to_sym) + element.parts.first.is_a?(TStringContent) + iseq.putobject(element.parts.first.value.to_sym) else length = visit_string_parts(element) - builder.concatstrings(length) - builder.intern + iseq.concatstrings(length) + iseq.intern end end - builder.newarray(node.elements.length) + iseq.newarray(node.elements.length) end end def visit_top_const_ref(node) - builder.opt_getconstant_path(constant_names(node)) + iseq.opt_getconstant_path(constant_names(node)) end def visit_tstring_content(node) if frozen_string_literal - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) else - builder.putstring(node.accept(RubyVisitor.new)) + iseq.putstring(node.accept(RubyVisitor.new)) end end @@ -1406,34 +1353,34 @@ def visit_unary(node) def visit_undef(node) node.symbols.each_with_index do |symbol, index| - builder.pop if index != 0 - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) + iseq.pop if index != 0 + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(symbol) - builder.send(:"core#undef_method", 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:"core#undef_method", 2, YARV::VM_CALL_ARGS_SIMPLE) end end def visit_unless(node) visit(node.predicate) - branchunless = builder.branchunless(-1) - node.consequent ? visit(node.consequent) : builder.putnil + branchunless = iseq.branchunless(-1) + node.consequent ? visit(node.consequent) : iseq.putnil if last_statement? - builder.leave - branchunless[1] = builder.label + iseq.leave + branchunless[1] = iseq.label visit(node.statements) else - builder.pop + iseq.pop if node.consequent - jump = builder.jump(-1) - branchunless[1] = builder.label + jump = iseq.jump(-1) + branchunless[1] = iseq.label visit(node.consequent) - jump[1] = builder.label + jump[1] = iseq.label else - branchunless[1] = builder.label + branchunless[1] = iseq.label end end end @@ -1441,34 +1388,34 @@ def visit_unless(node) def visit_until(node) jumps = [] - jumps << builder.jump(-1) - builder.putnil - builder.pop - jumps << builder.jump(-1) + jumps << iseq.jump(-1) + iseq.putnil + iseq.pop + jumps << iseq.jump(-1) - label = builder.label + label = iseq.label visit(node.statements) - builder.pop - jumps.each { |jump| jump[1] = builder.label } + iseq.pop + jumps.each { |jump| jump[1] = iseq.label } visit(node.predicate) - builder.branchunless(label) - builder.putnil if last_statement? + iseq.branchunless(label) + iseq.putnil if last_statement? end def visit_var_field(node) case node.value when CVar, IVar name = node.value.value.to_sym - current_iseq.inline_storage_for(name) + iseq.inline_storage_for(name) when Ident name = node.value.value.to_sym - if (local_variable = current_iseq.local_variable(name)) + if (local_variable = iseq.local_variable(name)) local_variable else - current_iseq.local_table.plain(name) - current_iseq.local_variable(name) + iseq.local_table.plain(name) + iseq.local_variable(name) end end end @@ -1476,43 +1423,44 @@ def visit_var_field(node) def visit_var_ref(node) case node.value when Const - builder.opt_getconstant_path(constant_names(node)) + iseq.opt_getconstant_path(constant_names(node)) when CVar name = node.value.value.to_sym - builder.getclassvariable(name) + iseq.getclassvariable(name) when GVar - builder.getglobal(node.value.value.to_sym) + iseq.getglobal(node.value.value.to_sym) when Ident - lookup = current_iseq.local_variable(node.value.value.to_sym) + lookup = iseq.local_variable(node.value.value.to_sym) case lookup.local when YARV::LocalTable::BlockLocal - builder.getblockparam(lookup.index, lookup.level) + iseq.getblockparam(lookup.index, lookup.level) when YARV::LocalTable::PlainLocal - builder.getlocal(lookup.index, lookup.level) + iseq.getlocal(lookup.index, lookup.level) end when IVar name = node.value.value.to_sym - builder.getinstancevariable(name) + iseq.getinstancevariable(name) when Kw case node.value.value when "false" - builder.putobject(false) + iseq.putobject(false) when "nil" - builder.putnil + iseq.putnil when "self" - builder.putself + iseq.putself when "true" - builder.putobject(true) + iseq.putobject(true) end end end def visit_vcall(node) - builder.putself + iseq.putself - flag = YARV::VM_CALL_FCALL | YARV::VM_CALL_VCALL | YARV::VM_CALL_ARGS_SIMPLE - builder.send(node.value.value.to_sym, 0, flag) + flag = + YARV::VM_CALL_FCALL | YARV::VM_CALL_VCALL | YARV::VM_CALL_ARGS_SIMPLE + iseq.send(node.value.value.to_sym, 0, flag) end def visit_when(node) @@ -1522,19 +1470,19 @@ def visit_when(node) def visit_while(node) jumps = [] - jumps << builder.jump(-1) - builder.putnil - builder.pop - jumps << builder.jump(-1) + jumps << iseq.jump(-1) + iseq.putnil + iseq.pop + jumps << iseq.jump(-1) - label = builder.label + label = iseq.label visit(node.statements) - builder.pop - jumps.each { |jump| jump[1] = builder.label } + iseq.pop + jumps.each { |jump| jump[1] = iseq.label } visit(node.predicate) - builder.branchif(label) - builder.putnil if last_statement? + iseq.branchif(label) + iseq.putnil if last_statement? end def visit_word(node) @@ -1542,38 +1490,39 @@ def visit_word(node) visit(node.parts.first) else length = visit_string_parts(node) - builder.concatstrings(length) + iseq.concatstrings(length) end end def visit_words(node) if frozen_string_literal && (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) + iseq.duparray(compiled) else visit_all(node.elements) - builder.newarray(node.elements.length) + iseq.newarray(node.elements.length) end end def visit_xstring_literal(node) - builder.putself + iseq.putself length = visit_string_parts(node) - builder.concatstrings(node.parts.length) if length > 1 - builder.send(:`, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) + iseq.concatstrings(node.parts.length) if length > 1 + iseq.send(:`, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) end def visit_yield(node) parts = argument_parts(node.arguments) visit_all(parts) - builder.invokeblock(nil, parts.length, YARV::VM_CALL_ARGS_SIMPLE) + iseq.invokeblock(nil, parts.length, YARV::VM_CALL_ARGS_SIMPLE) end def visit_zsuper(_node) - builder.putself - builder.invokesuper( + iseq.putself + iseq.invokesuper( nil, 0, - YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER | YARV::VM_CALL_ZSUPER, + YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER | + YARV::VM_CALL_ZSUPER, nil ) end @@ -1638,81 +1587,85 @@ def opassign_defined(node) visit(node.target.parent) name = node.target.constant.value.to_sym - builder.dup - builder.defined(YARV::DEFINED_CONST_FROM, name, true) + iseq.dup + iseq.defined(YARV::DEFINED_CONST_FROM, name, true) when TopConstField name = node.target.constant.value.to_sym - builder.putobject(Object) - builder.dup - builder.defined(YARV::DEFINED_CONST_FROM, name, true) + iseq.putobject(Object) + iseq.dup + iseq.defined(YARV::DEFINED_CONST_FROM, name, true) when VarField name = node.target.value.value.to_sym - builder.putnil + iseq.putnil case node.target.value when Const - builder.defined(YARV::DEFINED_CONST, name, true) + iseq.defined(YARV::DEFINED_CONST, name, true) when CVar - builder.defined(YARV::DEFINED_CVAR, name, true) + iseq.defined(YARV::DEFINED_CVAR, name, true) when GVar - builder.defined(YARV::DEFINED_GVAR, name, true) + iseq.defined(YARV::DEFINED_GVAR, name, true) end end - branchunless = builder.branchunless(-1) + branchunless = iseq.branchunless(-1) case node.target when ConstPathField, TopConstField - builder.dup - builder.putobject(true) - builder.getconstant(name) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) when VarField case node.target.value when Const - builder.opt_getconstant_path(constant_names(node.target)) + iseq.opt_getconstant_path(constant_names(node.target)) when CVar - builder.getclassvariable(name) + iseq.getclassvariable(name) when GVar - builder.getglobal(name) + iseq.getglobal(name) end end - builder.dup - branchif = builder.branchif(-1) - builder.pop + iseq.dup + branchif = iseq.branchif(-1) + iseq.pop - branchunless[1] = builder.label + branchunless[1] = iseq.label visit(node.value) case node.target when ConstPathField, TopConstField - builder.dupn(2) - builder.swap - builder.setconstant(name) + iseq.dupn(2) + iseq.swap + iseq.setconstant(name) when VarField - builder.dup + iseq.dup case node.target.value when Const - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(name) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) + iseq.setconstant(name) when CVar - builder.setclassvariable(name) + iseq.setclassvariable(name) when GVar - builder.setglobal(name) + iseq.setglobal(name) end end - branchif[1] = builder.label + branchif[1] = iseq.label end # Whenever a value is interpolated into a string-like structure, these # three instructions are pushed. def push_interpolate - builder.dup - builder.objtostring(:to_s, 0, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) - builder.anytostring + iseq.dup + iseq.objtostring( + :to_s, + 0, + YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE + ) + iseq.anytostring end # There are a lot of nodes in the AST that act as contains of parts of @@ -1723,7 +1676,7 @@ def visit_string_parts(node) length = 0 unless node.parts.first.is_a?(TStringContent) - builder.putobject("") + iseq.putobject("") length += 1 end @@ -1736,7 +1689,7 @@ def visit_string_parts(node) visit(part) push_interpolate when TStringContent - builder.putobject(part.accept(RubyVisitor.new)) + iseq.putobject(part.accept(RubyVisitor.new)) end length += 1 @@ -1749,27 +1702,26 @@ def visit_string_parts(node) # on the compiler. When we descend into a node that has its own # instruction sequence, this method can be called to temporarily set the # new value of the instruction sequence, yield, and then set it back. - def with_instruction_sequence(type, name, parent_iseq, node) - previous_iseq = current_iseq - previous_builder = builder + def with_instruction_sequence(type, name, node) + parent_iseq = iseq begin - iseq = YARV::InstructionSequence.new(type, name, parent_iseq, node.location) - - @current_iseq = iseq - @builder = - YARV::Builder.new( - iseq, + iseq = + YARV::InstructionSequence.new( + type, + name, + parent_iseq, + node.location, frozen_string_literal: frozen_string_literal, operands_unification: operands_unification, specialized_instruction: specialized_instruction ) + @iseq = iseq yield iseq ensure - @current_iseq = previous_iseq - @builder = previous_builder + @iseq = parent_iseq end end @@ -1803,99 +1755,99 @@ def last_statement? def with_opassign(node) case node.target when ARefField - builder.putnil + iseq.putnil visit(node.target.collection) visit(node.target.index) - builder.dupn(2) - builder.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.dupn(2) + iseq.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) yield - builder.setn(3) - builder.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) - builder.pop + iseq.setn(3) + iseq.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.pop when ConstPathField name = node.target.constant.value.to_sym visit(node.target.parent) - builder.dup - builder.putobject(true) - builder.getconstant(name) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) yield if node.operator.value == "&&=" - builder.dupn(2) + iseq.dupn(2) else - builder.swap - builder.topn(1) + iseq.swap + iseq.topn(1) end - builder.swap - builder.setconstant(name) + iseq.swap + iseq.setconstant(name) when TopConstField name = node.target.constant.value.to_sym - builder.putobject(Object) - builder.dup - builder.putobject(true) - builder.getconstant(name) + iseq.putobject(Object) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) yield if node.operator.value == "&&=" - builder.dupn(2) + iseq.dupn(2) else - builder.swap - builder.topn(1) + iseq.swap + iseq.topn(1) end - builder.swap - builder.setconstant(name) + iseq.swap + iseq.setconstant(name) when VarField case node.target.value when Const names = constant_names(node.target) - builder.opt_getconstant_path(names) + iseq.opt_getconstant_path(names) yield - builder.dup - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(names.last) + iseq.dup + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) + iseq.setconstant(names.last) when CVar name = node.target.value.value.to_sym - builder.getclassvariable(name) + iseq.getclassvariable(name) yield - builder.dup - builder.setclassvariable(name) + iseq.dup + iseq.setclassvariable(name) when GVar name = node.target.value.value.to_sym - builder.getglobal(name) + iseq.getglobal(name) yield - builder.dup - builder.setglobal(name) + iseq.dup + iseq.setglobal(name) when Ident local_variable = visit(node.target) - builder.getlocal(local_variable.index, local_variable.level) + iseq.getlocal(local_variable.index, local_variable.level) yield - builder.dup - builder.setlocal(local_variable.index, local_variable.level) + iseq.dup + iseq.setlocal(local_variable.index, local_variable.level) when IVar name = node.target.value.value.to_sym - builder.getinstancevariable(name) + iseq.getinstancevariable(name) yield - builder.dup - builder.setinstancevariable(name) + iseq.dup + iseq.setinstancevariable(name) end end end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 7290d87f..b6c3468c 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -147,7 +147,20 @@ class InstructionSequence # maximum size of the stack for this instruction sequence. attr_reader :stack - def initialize(type, name, parent_iseq, location) + # These are various compilation options provided. + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + def initialize( + type, + name, + parent_iseq, + location, + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) @type = type @name = name @parent_iseq = parent_iseq @@ -161,8 +174,16 @@ def initialize(type, name, parent_iseq, location) @insns = [] @storage_index = 0 @stack = Stack.new + + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction end + ########################################################################## + # Query methods + ########################################################################## + def local_variable(name, level = 0) if (lookup = local_table.find(name, level)) lookup @@ -171,11 +192,6 @@ def local_variable(name, level = 0) end end - def push(insn) - insns << insn - insn - end - def inline_storage storage = storage_index @storage_index += 1 @@ -183,9 +199,7 @@ def inline_storage end def inline_storage_for(name) - unless inline_storages.key?(name) - inline_storages[name] = inline_storage - end + inline_storages[name] = inline_storage unless inline_storages.key?(name) inline_storages[name] end @@ -239,251 +253,149 @@ def to_a ] end - private - - def serialize(insn) - case insn[0] - when :checkkeyword, :getblockparam, :getblockparamproxy, - :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, - :setlocal_WC_1, :setlocal - iseq = self - - case insn[0] - when :getlocal_WC_1, :setlocal_WC_1 - iseq = iseq.parent_iseq - when :getblockparam, :getblockparamproxy, :getlocal, :setlocal - insn[2].times { iseq = iseq.parent_iseq } - end - - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] - when :defineclass - [insn[0], insn[1], insn[2].to_a, insn[3]] - when :definemethod, :definesmethod - [insn[0], insn[1], insn[2].to_a] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - when :once - [insn[0], insn[1].to_a, insn[2]] - else - insn - end - end - end - - # This class is responsible for taking a compiled instruction sequence and - # walking through it to generate equivalent Ruby code. - class Disassembler - attr_reader :iseq - - def initialize(iseq) - @iseq = iseq - end - - def to_ruby - stack = [] - - iseq.insns.each do |insn| - case insn[0] - when :getlocal_WC_0 - value = iseq.local_table.locals[insn[1]].name.to_s - stack << VarRef.new(value: Ident.new(value: value, location: Location.default), location: Location.default) - when :leave - stack << ReturnNode.new(arguments: Args.new(parts: [stack.pop], location: Location.default), location: Location.default) - when :opt_mult - left, right = stack.pop(2) - stack << Binary.new(left: left, operator: :*, right: right, location: Location.default) - when :opt_plus - left, right = stack.pop(2) - stack << Binary.new(left: left, operator: :+, right: right, location: Location.default) - when :putobject - case insn[1] - when Float - stack << FloatLiteral.new(value: insn[1].inspect, location: Location.default) - when Integer - stack << Int.new(value: insn[1].inspect, location: Location.default) - when Rational - stack << RationalLiteral.new(value: insn[1].inspect, location: Location.default) - else - raise "Unknown object type: #{insn[1].class.name}" - end - when :putobject_INT2FIX_1_ - stack << Int.new(value: "1", location: Location.default) - when :setlocal_WC_0 - target = VarField.new(value: Ident.new(value: iseq.local_table.locals[insn[1]].name.to_s, location: Location.default), location: Location.default) - stack << Assign.new(target: target, value: stack.pop, location: Location.default) - else - raise "Unknown instruction #{insn[0]}" - end - end - - Statements.new(nil, body: stack, location: Location.default) - end - end - - # This class serves as a layer of indirection between the instruction - # sequence and the compiler. It allows us to provide different behavior - # for certain instructions depending on the Ruby version. For example, - # class variable reads and writes gained an inline cache in Ruby 3.0. So - # we place the logic for checking the Ruby version in this class. - class Builder - attr_reader :iseq, :stack - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction + ########################################################################## + # Instruction push methods + ########################################################################## - def initialize( - iseq, - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @iseq = iseq - @stack = iseq.stack - - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction + def push(insn) + insns << insn + insn end # This creates a new label at the current length of the instruction # sequence. It is used as the operand for jump instructions. def label - name = :"label_#{iseq.length}" - iseq.insns.last == name ? name : event(name) + name = :"label_#{length}" + insns.last == name ? name : event(name) end def event(name) - iseq.push(name) - name + push(name) end def adjuststack(number) stack.change_by(-number) - iseq.push([:adjuststack, number]) + push([:adjuststack, number]) end def anytostring stack.change_by(-2 + 1) - iseq.push([:anytostring]) + push([:anytostring]) end def branchif(index) stack.change_by(-1) - iseq.push([:branchif, index]) + push([:branchif, index]) end def branchnil(index) stack.change_by(-1) - iseq.push([:branchnil, index]) + push([:branchnil, index]) end def branchunless(index) stack.change_by(-1) - iseq.push([:branchunless, index]) + push([:branchunless, index]) end def checkkeyword(index, keyword_index) stack.change_by(+1) - iseq.push([:checkkeyword, index, keyword_index]) + push([:checkkeyword, index, keyword_index]) end def concatarray stack.change_by(-2 + 1) - iseq.push([:concatarray]) + push([:concatarray]) end def concatstrings(number) stack.change_by(-number + 1) - iseq.push([:concatstrings, number]) + push([:concatstrings, number]) end def defined(type, name, message) stack.change_by(-1 + 1) - iseq.push([:defined, type, name, message]) + push([:defined, type, name, message]) end def defineclass(name, class_iseq, flags) stack.change_by(-2 + 1) - iseq.push([:defineclass, name, class_iseq, flags]) + push([:defineclass, name, class_iseq, flags]) end def definemethod(name, method_iseq) stack.change_by(0) - iseq.push([:definemethod, name, method_iseq]) + push([:definemethod, name, method_iseq]) end def definesmethod(name, method_iseq) stack.change_by(-1) - iseq.push([:definesmethod, name, method_iseq]) + push([:definesmethod, name, method_iseq]) end def dup stack.change_by(-1 + 2) - iseq.push([:dup]) + push([:dup]) end def duparray(object) stack.change_by(+1) - iseq.push([:duparray, object]) + push([:duparray, object]) end def duphash(object) stack.change_by(+1) - iseq.push([:duphash, object]) + push([:duphash, object]) end def dupn(number) stack.change_by(+number) - iseq.push([:dupn, number]) + push([:dupn, number]) end def expandarray(length, flag) stack.change_by(-1 + length) - iseq.push([:expandarray, length, flag]) + push([:expandarray, length, flag]) end def getblockparam(index, level) stack.change_by(+1) - iseq.push([:getblockparam, index, level]) + push([:getblockparam, index, level]) end def getblockparamproxy(index, level) stack.change_by(+1) - iseq.push([:getblockparamproxy, index, level]) + push([:getblockparamproxy, index, level]) end def getclassvariable(name) stack.change_by(+1) if RUBY_VERSION >= "3.0" - iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) + push([:getclassvariable, name, inline_storage_for(name)]) else - iseq.push([:getclassvariable, name]) + push([:getclassvariable, name]) end end def getconstant(name) stack.change_by(-2 + 1) - iseq.push([:getconstant, name]) + push([:getconstant, name]) end def getglobal(name) stack.change_by(+1) - iseq.push([:getglobal, name]) + push([:getglobal, name]) end def getinstancevariable(name) stack.change_by(+1) if RUBY_VERSION >= "3.2" - iseq.push([:getinstancevariable, name, iseq.inline_storage]) + push([:getinstancevariable, name, inline_storage]) else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:getinstancevariable, name, inline_storage]) + inline_storage = inline_storage_for(name) + push([:getinstancevariable, name, inline_storage]) end end @@ -497,86 +409,86 @@ def getlocal(index, level) # scope, respectively, and requires fewer operands. case level when 0 - iseq.push([:getlocal_WC_0, index]) + push([:getlocal_WC_0, index]) when 1 - iseq.push([:getlocal_WC_1, index]) + push([:getlocal_WC_1, index]) else - iseq.push([:getlocal, index, level]) + push([:getlocal, index, level]) end else - iseq.push([:getlocal, index, level]) + push([:getlocal, index, level]) end end def getspecial(key, type) stack.change_by(-0 + 1) - iseq.push([:getspecial, key, type]) + push([:getspecial, key, type]) end def intern stack.change_by(-1 + 1) - iseq.push([:intern]) + push([:intern]) end def invokeblock(method_id, argc, flag) stack.change_by(-argc + 1) - iseq.push([:invokeblock, call_data(method_id, argc, flag)]) + push([:invokeblock, call_data(method_id, argc, flag)]) end def invokesuper(method_id, argc, flag, block_iseq) stack.change_by(-(argc + 1) + 1) cdata = call_data(method_id, argc, flag) - iseq.push([:invokesuper, cdata, block_iseq]) + push([:invokesuper, cdata, block_iseq]) end def jump(index) stack.change_by(0) - iseq.push([:jump, index]) + push([:jump, index]) end def leave stack.change_by(-1) - iseq.push([:leave]) + push([:leave]) end def newarray(length) stack.change_by(-length + 1) - iseq.push([:newarray, length]) + push([:newarray, length]) end def newhash(length) stack.change_by(-length + 1) - iseq.push([:newhash, length]) + push([:newhash, length]) end def newrange(flag) stack.change_by(-2 + 1) - iseq.push([:newrange, flag]) + push([:newrange, flag]) end def nop stack.change_by(0) - iseq.push([:nop]) + push([:nop]) end def objtostring(method_id, argc, flag) stack.change_by(-1 + 1) - iseq.push([:objtostring, call_data(method_id, argc, flag)]) + push([:objtostring, call_data(method_id, argc, flag)]) end def once(postexe_iseq, inline_storage) stack.change_by(+1) - iseq.push([:once, postexe_iseq, inline_storage]) + push([:once, postexe_iseq, inline_storage]) end def opt_getconstant_path(names) if RUBY_VERSION >= "3.2" stack.change_by(+1) - iseq.push([:opt_getconstant_path, names]) + push([:opt_getconstant_path, names]) else - inline_storage = iseq.inline_storage - getinlinecache = opt_getinlinecache(-1, inline_storage) + const_inline_storage = inline_storage + getinlinecache = opt_getinlinecache(-1, const_inline_storage) if names[0] == :"" names.shift @@ -589,20 +501,20 @@ def opt_getconstant_path(names) getconstant(name) end - opt_setinlinecache(inline_storage) + opt_setinlinecache(const_inline_storage) getinlinecache[1] = label end end def opt_getinlinecache(offset, inline_storage) stack.change_by(+1) - iseq.push([:opt_getinlinecache, offset, inline_storage]) + push([:opt_getinlinecache, offset, inline_storage]) end def opt_newarray_max(length) if specialized_instruction stack.change_by(-length + 1) - iseq.push([:opt_newarray_max, length]) + push([:opt_newarray_max, length]) else newarray(length) send(:max, 0, VM_CALL_ARGS_SIMPLE) @@ -612,7 +524,7 @@ def opt_newarray_max(length) def opt_newarray_min(length) if specialized_instruction stack.change_by(-length + 1) - iseq.push([:opt_newarray_min, length]) + push([:opt_newarray_min, length]) else newarray(length) send(:min, 0, VM_CALL_ARGS_SIMPLE) @@ -621,18 +533,14 @@ def opt_newarray_min(length) def opt_setinlinecache(inline_storage) stack.change_by(-1 + 1) - iseq.push([:opt_setinlinecache, inline_storage]) + push([:opt_setinlinecache, inline_storage]) end def opt_str_freeze(value) if specialized_instruction stack.change_by(+1) - iseq.push( - [ - :opt_str_freeze, - value, - call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) - ] + push( + [:opt_str_freeze, value, call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE)] ) else putstring(value) @@ -643,9 +551,7 @@ def opt_str_freeze(value) def opt_str_uminus(value) if specialized_instruction stack.change_by(+1) - iseq.push( - [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] - ) + push([:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)]) else putstring(value) send(:-@, 0, VM_CALL_ARGS_SIMPLE) @@ -654,12 +560,12 @@ def opt_str_uminus(value) def pop stack.change_by(-1) - iseq.push([:pop]) + push([:pop]) end def putnil stack.change_by(+1) - iseq.push([:putnil]) + push([:putnil]) end def putobject(object) @@ -671,30 +577,30 @@ def putobject(object) # that will push the object onto the stack and requires fewer # operands. if object.eql?(0) - iseq.push([:putobject_INT2FIX_0_]) + push([:putobject_INT2FIX_0_]) elsif object.eql?(1) - iseq.push([:putobject_INT2FIX_1_]) + push([:putobject_INT2FIX_1_]) else - iseq.push([:putobject, object]) + push([:putobject, object]) end else - iseq.push([:putobject, object]) + push([:putobject, object]) end end def putself stack.change_by(+1) - iseq.push([:putself]) + push([:putself]) end def putspecialobject(object) stack.change_by(+1) - iseq.push([:putspecialobject, object]) + push([:putspecialobject, object]) end def putstring(object) stack.change_by(+1) - iseq.push([:putstring, object]) + push([:putstring, object]) end def send(method_id, argc, flag, block_iseq = nil) @@ -710,39 +616,39 @@ def send(method_id, argc, flag, block_iseq = nil) # stree-ignore if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 case [method_id, argc] - when [:length, 0] then iseq.push([:opt_length, cdata]) - when [:size, 0] then iseq.push([:opt_size, cdata]) - when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) - when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) - when [:succ, 0] then iseq.push([:opt_succ, cdata]) - when [:!, 0] then iseq.push([:opt_not, cdata]) - when [:+, 1] then iseq.push([:opt_plus, cdata]) - when [:-, 1] then iseq.push([:opt_minus, cdata]) - when [:*, 1] then iseq.push([:opt_mult, cdata]) - when [:/, 1] then iseq.push([:opt_div, cdata]) - when [:%, 1] then iseq.push([:opt_mod, cdata]) - when [:==, 1] then iseq.push([:opt_eq, cdata]) - when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) - when [:<, 1] then iseq.push([:opt_lt, cdata]) - when [:<=, 1] then iseq.push([:opt_le, cdata]) - when [:>, 1] then iseq.push([:opt_gt, cdata]) - when [:>=, 1] then iseq.push([:opt_ge, cdata]) - when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) - when [:[], 1] then iseq.push([:opt_aref, cdata]) - when [:&, 1] then iseq.push([:opt_and, cdata]) - when [:|, 1] then iseq.push([:opt_or, cdata]) - when [:[]=, 2] then iseq.push([:opt_aset, cdata]) + when [:length, 0] then push([:opt_length, cdata]) + when [:size, 0] then push([:opt_size, cdata]) + when [:empty?, 0] then push([:opt_empty_p, cdata]) + when [:nil?, 0] then push([:opt_nil_p, cdata]) + when [:succ, 0] then push([:opt_succ, cdata]) + when [:!, 0] then push([:opt_not, cdata]) + when [:+, 1] then push([:opt_plus, cdata]) + when [:-, 1] then push([:opt_minus, cdata]) + when [:*, 1] then push([:opt_mult, cdata]) + when [:/, 1] then push([:opt_div, cdata]) + when [:%, 1] then push([:opt_mod, cdata]) + when [:==, 1] then push([:opt_eq, cdata]) + when [:=~, 1] then push([:opt_regexpmatch2, cdata]) + when [:<, 1] then push([:opt_lt, cdata]) + when [:<=, 1] then push([:opt_le, cdata]) + when [:>, 1] then push([:opt_gt, cdata]) + when [:>=, 1] then push([:opt_ge, cdata]) + when [:<<, 1] then push([:opt_ltlt, cdata]) + when [:[], 1] then push([:opt_aref, cdata]) + when [:&, 1] then push([:opt_and, cdata]) + when [:|, 1] then push([:opt_or, cdata]) + when [:[]=, 2] then push([:opt_aset, cdata]) when [:!=, 1] eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) - iseq.push([:opt_neq, eql_data, cdata]) + push([:opt_neq, eql_data, cdata]) else - iseq.push([:opt_send_without_block, cdata]) + push([:opt_send_without_block, cdata]) end else - iseq.push([:send, cdata, block_iseq]) + push([:send, cdata, block_iseq]) end else - iseq.push([:send, cdata, block_iseq]) + push([:send, cdata, block_iseq]) end end @@ -750,30 +656,29 @@ def setclassvariable(name) stack.change_by(-1) if RUBY_VERSION >= "3.0" - iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) + push([:setclassvariable, name, inline_storage_for(name)]) else - iseq.push([:setclassvariable, name]) + push([:setclassvariable, name]) end end def setconstant(name) stack.change_by(-2) - iseq.push([:setconstant, name]) + push([:setconstant, name]) end def setglobal(name) stack.change_by(-1) - iseq.push([:setglobal, name]) + push([:setglobal, name]) end def setinstancevariable(name) stack.change_by(-1) if RUBY_VERSION >= "3.2" - iseq.push([:setinstancevariable, name, iseq.inline_storage]) + push([:setinstancevariable, name, inline_storage]) else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:setinstancevariable, name, inline_storage]) + push([:setinstancevariable, name, inline_storage_for(name)]) end end @@ -787,40 +692,40 @@ def setlocal(index, level) # scope, respectively, and requires fewer operands. case level when 0 - iseq.push([:setlocal_WC_0, index]) + push([:setlocal_WC_0, index]) when 1 - iseq.push([:setlocal_WC_1, index]) + push([:setlocal_WC_1, index]) else - iseq.push([:setlocal, index, level]) + push([:setlocal, index, level]) end else - iseq.push([:setlocal, index, level]) + push([:setlocal, index, level]) end end def setn(number) stack.change_by(-1 + 1) - iseq.push([:setn, number]) + push([:setn, number]) end def splatarray(flag) stack.change_by(-1 + 1) - iseq.push([:splatarray, flag]) + push([:splatarray, flag]) end def swap stack.change_by(-2 + 2) - iseq.push([:swap]) + push([:swap]) end def topn(number) stack.change_by(+1) - iseq.push([:topn, number]) + push([:topn, number]) end def toregexp(options, length) stack.change_by(-length + 1) - iseq.push([:toregexp, options, length]) + push([:toregexp, options, length]) end private @@ -830,6 +735,126 @@ def toregexp(options, length) def call_data(method_id, argc, flag) { mid: method_id, flag: flag, orig_argc: argc } end + + def serialize(insn) + case insn[0] + when :checkkeyword, :getblockparam, :getblockparamproxy, :getlocal_WC_0, + :getlocal_WC_1, :getlocal, :setlocal_WC_0, :setlocal_WC_1, + :setlocal + iseq = self + + case insn[0] + when :getlocal_WC_1, :setlocal_WC_1 + iseq = iseq.parent_iseq + when :getblockparam, :getblockparamproxy, :getlocal, :setlocal + insn[2].times { iseq = iseq.parent_iseq } + end + + # Here we need to map the local variable index to the offset + # from the top of the stack where it will be stored. + [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] + when :defineclass + [insn[0], insn[1], insn[2].to_a, insn[3]] + when :definemethod, :definesmethod + [insn[0], insn[1], insn[2].to_a] + when :send + # For any instructions that push instruction sequences onto the + # stack, we need to call #to_a on them as well. + [insn[0], insn[1], (insn[2].to_a if insn[2])] + when :once + [insn[0], insn[1].to_a, insn[2]] + else + insn + end + end + end + + # This class is responsible for taking a compiled instruction sequence and + # walking through it to generate equivalent Ruby code. + class Disassembler + attr_reader :iseq + + def initialize(iseq) + @iseq = iseq + end + + def to_ruby + stack = [] + + iseq.insns.each do |insn| + case insn[0] + when :getlocal_WC_0 + value = iseq.local_table.locals[insn[1]].name.to_s + stack << VarRef.new( + value: Ident.new(value: value, location: Location.default), + location: Location.default + ) + when :leave + stack << ReturnNode.new( + arguments: + Args.new(parts: [stack.pop], location: Location.default), + location: Location.default + ) + when :opt_mult + left, right = stack.pop(2) + stack << Binary.new( + left: left, + operator: :*, + right: right, + location: Location.default + ) + when :opt_plus + left, right = stack.pop(2) + stack << Binary.new( + left: left, + operator: :+, + right: right, + location: Location.default + ) + when :putobject + case insn[1] + when Float + stack << FloatLiteral.new( + value: insn[1].inspect, + location: Location.default + ) + when Integer + stack << Int.new( + value: insn[1].inspect, + location: Location.default + ) + when Rational + stack << RationalLiteral.new( + value: insn[1].inspect, + location: Location.default + ) + else + raise "Unknown object type: #{insn[1].class.name}" + end + when :putobject_INT2FIX_1_ + stack << Int.new(value: "1", location: Location.default) + when :setlocal_WC_0 + target = + VarField.new( + value: + Ident.new( + value: iseq.local_table.locals[insn[1]].name.to_s, + location: Location.default + ), + location: Location.default + ) + stack << Assign.new( + target: target, + value: stack.pop, + location: Location.default + ) + else + raise "Unknown instruction #{insn[0]}" + end + end + + Statements.new(nil, body: stack, location: Location.default) + end end # These constants correspond to the putspecialobject instruction. They are From 154e75f9fe4f831237206fff080b03ad22d59d32 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 10:02:31 -0500 Subject: [PATCH 07/21] Put child iseq methods on iseq --- lib/syntax_tree/compiler.rb | 57 ++++++++++++++++++------------------- lib/syntax_tree/yarv.rb | 46 ++++++++++++++++++++++++------ test/compiler_test.rb | 3 ++ 3 files changed, 69 insertions(+), 37 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 424a9cf5..926661cc 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -225,14 +225,17 @@ def visit_CHAR(node) end def visit_END(node) - name = "block in #{iseq.name}" once_iseq = - with_instruction_sequence(:block, name, node) do + with_child_iseq(iseq.block_child_iseq(node.location)) do postexe_iseq = - with_instruction_sequence(:block, name, node) do + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + *statements, last_statement = node.statements.body visit_all(statements) with_last_statement { visit(last_statement) } + + iseq.event(:RUBY_EVENT_B_RETURN) iseq.leave end @@ -422,7 +425,7 @@ def visit_binary(node) end def visit_block(node) - with_instruction_sequence(:block, "block in #{iseq.name}", node) do + with_child_iseq(iseq.block_child_iseq(node.location)) do iseq.event(:RUBY_EVENT_B_CALL) visit(node.block_var) visit(node.bodystmt) @@ -606,7 +609,7 @@ def visit_case(node) def visit_class(node) name = node.constant.constant.value.to_sym class_iseq = - with_instruction_sequence(:class, "", node) do + with_child_iseq(iseq.class_child_iseq(name, node.location)) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -673,7 +676,7 @@ def visit_const_path_ref(node) def visit_def(node) method_iseq = - with_instruction_sequence(:method, node.name.value, node) do + with_child_iseq(iseq.method_child_iseq(node.name.value, node.location)) do visit(node.params) if node.params iseq.event(:RUBY_EVENT_CALL) visit(node.bodystmt) @@ -788,11 +791,7 @@ def visit_for(node) iseq.local_table.plain(name) block_iseq = - with_instruction_sequence( - :block, - "block in #{iseq.name}", - node.statements - ) do + with_child_iseq(iseq.block_child_iseq(node.statements.location)) do iseq.argument_options[:lead_num] ||= 0 iseq.argument_options[:lead_num] += 1 iseq.argument_options[:ambiguous_param0] = true @@ -896,7 +895,7 @@ def visit_label(node) def visit_lambda(node) lambda_iseq = - with_instruction_sequence(:block, "block in #{iseq.name}", node) do + with_child_iseq(iseq.block_child_iseq(node.location)) do iseq.event(:RUBY_EVENT_B_CALL) visit(node.params) visit(node.statements) @@ -947,7 +946,7 @@ def visit_mlhs(node) def visit_module(node) name = node.constant.constant.value.to_sym module_iseq = - with_instruction_sequence(:class, "", node) do + with_child_iseq(iseq.module_child_iseq(name, node.location)) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -1168,7 +1167,18 @@ def visit_program(node) end end - with_instruction_sequence(:top, "", node) do + top_iseq = + YARV::InstructionSequence.new( + :top, + "", + nil, + node.location, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + + with_child_iseq(top_iseq) do visit_all(preexes) if statements.empty? @@ -1231,7 +1241,7 @@ def visit_sclass(node) iseq.putnil singleton_iseq = - with_instruction_sequence(:class, "singleton class", node) do + with_child_iseq(iseq.singleton_class_child_iseq(node.location)) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -1702,24 +1712,13 @@ def visit_string_parts(node) # on the compiler. When we descend into a node that has its own # instruction sequence, this method can be called to temporarily set the # new value of the instruction sequence, yield, and then set it back. - def with_instruction_sequence(type, name, node) + def with_child_iseq(child_iseq) parent_iseq = iseq begin - iseq = - YARV::InstructionSequence.new( - type, - name, - parent_iseq, - node.location, - frozen_string_literal: frozen_string_literal, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) - - @iseq = iseq + @iseq = child_iseq yield - iseq + child_iseq ensure @iseq = parent_iseq end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index b6c3468c..12d1dba2 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -210,14 +210,6 @@ def length end end - def each_child - insns.each do |insn| - insn[1..].each do |operand| - yield operand if operand.is_a?(InstructionSequence) - end - end - end - def eval compiled = to_a @@ -253,6 +245,44 @@ def to_a ] end + ########################################################################## + # Child instruction sequence methods + ########################################################################## + + def child_iseq(type, name, location) + InstructionSequence.new( + type, + name, + self, + location, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + end + + def block_child_iseq(location) + current = self + current = current.parent_iseq while current.type == :block + child_iseq(:block, "block in #{current.name}", location) + end + + def class_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def method_child_iseq(name, location) + child_iseq(:method, name, location) + end + + def module_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def singleton_class_child_iseq(location) + child_iseq(:class, "singleton class", location) + end + ########################################################################## # Instruction push methods ########################################################################## diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 3b8c0ea2..27bf993d 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -6,6 +6,9 @@ module SyntaxTree class CompilerTest < Minitest::Test CASES = [ + # Hooks + "BEGIN { a = 1 }", + "a = 1; END { a = 1 }; a", # Various literals placed on the stack "true", "false", From df9f6220c009126f0a5b02c4a618ec54548d6e43 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 10:19:30 -0500 Subject: [PATCH 08/21] Test out disassembler --- lib/syntax_tree/yarv.rb | 171 ++++++++++++++++++++++++++++------------ test/yarv_test.rb | 46 +++++++++++ 2 files changed, 166 insertions(+), 51 deletions(-) create mode 100644 test/yarv_test.rb diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 12d1dba2..93f2ac06 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -802,6 +802,65 @@ def serialize(insn) # This class is responsible for taking a compiled instruction sequence and # walking through it to generate equivalent Ruby code. class Disassembler + module DSL + def Args(parts) + Args.new(parts: parts, location: Location.default) + end + + def ArgParen(arguments) + ArgParen.new(arguments: arguments, location: Location.default) + end + + def Assign(target, value) + Assign.new(target: target, value: value, location: Location.default) + end + + def Binary(left, operator, right) + Binary.new(left: left, operator: operator, right: right, location: Location.default) + end + + def CallNode(receiver, operator, message, arguments) + CallNode.new(receiver: receiver, operator: operator, message: message, arguments: arguments, location: Location.default) + end + + def FloatLiteral(value) + FloatLiteral.new(value: value, location: Location.default) + end + + def Ident(value) + Ident.new(value: value, location: Location.default) + end + + def Int(value) + Int.new(value: value, location: Location.default) + end + + def Period(value) + Period.new(value: value, location: Location.default) + end + + def Program(statements) + Program.new(statements: statements, location: Location.default) + end + + def ReturnNode(arguments) + ReturnNode.new(arguments: arguments, location: Location.default) + end + + def Statements(body) + Statements.new(nil, body: body, location: Location.default) + end + + def VarField(value) + VarField.new(value: value, location: Location.default) + end + + def VarRef(value) + VarRef.new(value: value, location: Location.default) + end + end + + include DSL attr_reader :iseq def initialize(iseq) @@ -812,78 +871,88 @@ def to_ruby stack = [] iseq.insns.each do |insn| + # skip line numbers and events + next unless insn.is_a?(Array) + case insn[0] when :getlocal_WC_0 - value = iseq.local_table.locals[insn[1]].name.to_s - stack << VarRef.new( - value: Ident.new(value: value, location: Location.default), - location: Location.default - ) + stack << VarRef(Ident(local_name(insn[1], 0))) when :leave - stack << ReturnNode.new( - arguments: - Args.new(parts: [stack.pop], location: Location.default), - location: Location.default - ) + stack << ReturnNode(Args([stack.pop])) + when :opt_and + left, right = stack.pop(2) + stack << Binary(left, :&, right) + when :opt_div + left, right = stack.pop(2) + stack << Binary(left, :/, right) + when :opt_eq + left, right = stack.pop(2) + stack << Binary(left, :==, right) + when :opt_ge + left, right = stack.pop(2) + stack << Binary(left, :>=, right) + when :opt_gt + left, right = stack.pop(2) + stack << Binary(left, :>, right) + when :opt_le + left, right = stack.pop(2) + stack << Binary(left, :<=, right) + when :opt_lt + left, right = stack.pop(2) + stack << Binary(left, :<, right) + when :opt_ltlt + left, right = stack.pop(2) + stack << Binary(left, :<<, right) + when :opt_minus + left, right = stack.pop(2) + stack << Binary(left, :-, right) + when :opt_mod + left, right = stack.pop(2) + stack << Binary(left, :%, right) when :opt_mult left, right = stack.pop(2) - stack << Binary.new( - left: left, - operator: :*, - right: right, - location: Location.default - ) + stack << Binary(left, :*, right) + when :opt_neq + left, right = stack.pop(2) + stack << Binary(left, :"!=", right) + when :opt_or + left, right = stack.pop(2) + stack << Binary(left, :|, right) when :opt_plus left, right = stack.pop(2) - stack << Binary.new( - left: left, - operator: :+, - right: right, - location: Location.default - ) + stack << Binary(left, :+, right) + when :opt_send_without_block + receiver, *arguments = stack.pop(insn[1][:orig_argc] + 1) + stack << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) when :putobject case insn[1] when Float - stack << FloatLiteral.new( - value: insn[1].inspect, - location: Location.default - ) + stack << FloatLiteral(insn[1].inspect) when Integer - stack << Int.new( - value: insn[1].inspect, - location: Location.default - ) - when Rational - stack << RationalLiteral.new( - value: insn[1].inspect, - location: Location.default - ) + stack << Int(insn[1].inspect) else raise "Unknown object type: #{insn[1].class.name}" end + when :putobject_INT2FIX_0_ + stack << Int("0") when :putobject_INT2FIX_1_ - stack << Int.new(value: "1", location: Location.default) + stack << Int("1") when :setlocal_WC_0 - target = - VarField.new( - value: - Ident.new( - value: iseq.local_table.locals[insn[1]].name.to_s, - location: Location.default - ), - location: Location.default - ) - stack << Assign.new( - target: target, - value: stack.pop, - location: Location.default - ) + stack << Assign(VarField(Ident(local_name(insn[1], 0))), stack.pop) else raise "Unknown instruction #{insn[0]}" end end - Statements.new(nil, body: stack, location: Location.default) + Program(Statements(stack)) + end + + private + + def local_name(index, level) + current = iseq + level.times { current = current.parent_iseq } + current.local_table.locals[index].name.to_s end end diff --git a/test/yarv_test.rb b/test/yarv_test.rb new file mode 100644 index 00000000..57371ba3 --- /dev/null +++ b/test/yarv_test.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" +require_relative "test_helper" + +module SyntaxTree + class YARVTest < Minitest::Test + CASES = { + "0" => "return 0\n", + "1" => "return 1\n", + "2" => "return 2\n", + "1.0" => "return 1.0\n", + "1 + 2" => "return 1 + 2\n", + "1 - 2" => "return 1 - 2\n", + "1 * 2" => "return 1 * 2\n", + "1 / 2" => "return 1 / 2\n", + "1 % 2" => "return 1 % 2\n", + "1 < 2" => "return 1 < 2\n", + "1 <= 2" => "return 1 <= 2\n", + "1 > 2" => "return 1 > 2\n", + "1 >= 2" => "return 1 >= 2\n", + "1 == 2" => "return 1 == 2\n", + "1 != 2" => "return 1 != 2\n", + "1 & 2" => "return 1 & 2\n", + "1 | 2" => "return 1 | 2\n", + "1 << 2" => "return 1 << 2\n", + "1 >> 2" => "return 1.>>(2)\n", + "1 ** 2" => "return 1.**(2)\n", + "a = 1; a" => "a = 1\nreturn a\n", + }.freeze + + CASES.each do |source, expected| + define_method("test_disassemble_#{source}") do + assert_disassembles(expected, source) + end + end + + private + + def assert_disassembles(expected, source) + iseq = SyntaxTree.parse(source).accept(Compiler.new) + actual = Formatter.format(source, YARV::Disassembler.new(iseq).to_ruby) + assert_equal expected, actual + end + end +end From 6c6b4376b88b27d911c577ab8c90de9c9cc47f95 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 12:23:22 -0500 Subject: [PATCH 09/21] Add BF compiler --- lib/syntax_tree.rb | 3 + lib/syntax_tree/dsl.rb | 129 ++++++++ lib/syntax_tree/yarv.rb | 157 --------- lib/syntax_tree/yarv/bf.rb | 466 +++++++++++++++++++++++++++ lib/syntax_tree/yarv/disassembler.rb | 209 ++++++++++++ 5 files changed, 807 insertions(+), 157 deletions(-) create mode 100644 lib/syntax_tree/dsl.rb create mode 100644 lib/syntax_tree/yarv/bf.rb create mode 100644 lib/syntax_tree/yarv/disassembler.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 187ff74d..2cbfa2e4 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -26,8 +26,11 @@ require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" +require_relative "syntax_tree/dsl" require_relative "syntax_tree/yarv" require_relative "syntax_tree/compiler" +require_relative "syntax_tree/yarv/bf" +require_relative "syntax_tree/yarv/disassembler" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb new file mode 100644 index 00000000..05911ee3 --- /dev/null +++ b/lib/syntax_tree/dsl.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +module SyntaxTree + module DSL + def ARef(collection, index) + ARef.new(collection: collection, index: index, location: Location.default) + end + + def ARefField(collection, index) + ARefField.new(collection: collection, index: index, location: Location.default) + end + + def Args(parts) + Args.new(parts: parts, location: Location.default) + end + + def ArgParen(arguments) + ArgParen.new(arguments: arguments, location: Location.default) + end + + def Assign(target, value) + Assign.new(target: target, value: value, location: Location.default) + end + + def Assoc(key, value) + Assoc.new(key: key, value: value, location: Location.default) + end + + def Binary(left, operator, right) + Binary.new(left: left, operator: operator, right: right, location: Location.default) + end + + def BlockNode(opening, block_var, bodystmt) + BlockNode.new(opening: opening, block_var: block_var, bodystmt: bodystmt, location: Location.default) + end + + def BodyStmt(statements, rescue_clause, else_keyword, else_clause, ensure_clause) + BodyStmt.new(statements: statements, rescue_clause: rescue_clause, else_keyword: else_keyword, else_clause: else_clause, ensure_clause: ensure_clause, location: Location.default) + end + + def CallNode(receiver, operator, message, arguments) + CallNode.new(receiver: receiver, operator: operator, message: message, arguments: arguments, location: Location.default) + end + + def Case(keyword, value, consequent) + Case.new(keyword: keyword, value: value, consequent: consequent, location: Location.default) + end + + def FloatLiteral(value) + FloatLiteral.new(value: value, location: Location.default) + end + + def GVar(value) + GVar.new(value: value, location: Location.default) + end + + def HashLiteral(lbrace, assocs) + HashLiteral.new(lbrace: lbrace, assocs: assocs, location: Location.default) + end + + def Ident(value) + Ident.new(value: value, location: Location.default) + end + + def IfNode(predicate, statements, consequent) + IfNode.new(predicate: predicate, statements: statements, consequent: consequent, location: Location.default) + end + + def Int(value) + Int.new(value: value, location: Location.default) + end + + def Kw(value) + Kw.new(value: value, location: Location.default) + end + + def LBrace(value) + LBrace.new(value: value, location: Location.default) + end + + def MethodAddBlock(call, block) + MethodAddBlock.new(call: call, block: block, location: Location.default) + end + + def Next(arguments) + Next.new(arguments: arguments, location: Location.default) + end + + def Op(value) + Op.new(value: value, location: Location.default) + end + + def OpAssign(target, operator, value) + OpAssign.new(target: target, operator: operator, value: value, location: Location.default) + end + + def Period(value) + Period.new(value: value, location: Location.default) + end + + def Program(statements) + Program.new(statements: statements, location: Location.default) + end + + def ReturnNode(arguments) + ReturnNode.new(arguments: arguments, location: Location.default) + end + + def Statements(body) + Statements.new(nil, body: body, location: Location.default) + end + + def SymbolLiteral(value) + SymbolLiteral.new(value: value, location: Location.default) + end + + def VarField(value) + VarField.new(value: value, location: Location.default) + end + + def VarRef(value) + VarRef.new(value: value, location: Location.default) + end + + def When(arguments, statements, consequent) + When.new(arguments: arguments, statements: statements, consequent: consequent, location: Location.default) + end + end +end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 93f2ac06..2224792a 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -799,163 +799,6 @@ def serialize(insn) end end - # This class is responsible for taking a compiled instruction sequence and - # walking through it to generate equivalent Ruby code. - class Disassembler - module DSL - def Args(parts) - Args.new(parts: parts, location: Location.default) - end - - def ArgParen(arguments) - ArgParen.new(arguments: arguments, location: Location.default) - end - - def Assign(target, value) - Assign.new(target: target, value: value, location: Location.default) - end - - def Binary(left, operator, right) - Binary.new(left: left, operator: operator, right: right, location: Location.default) - end - - def CallNode(receiver, operator, message, arguments) - CallNode.new(receiver: receiver, operator: operator, message: message, arguments: arguments, location: Location.default) - end - - def FloatLiteral(value) - FloatLiteral.new(value: value, location: Location.default) - end - - def Ident(value) - Ident.new(value: value, location: Location.default) - end - - def Int(value) - Int.new(value: value, location: Location.default) - end - - def Period(value) - Period.new(value: value, location: Location.default) - end - - def Program(statements) - Program.new(statements: statements, location: Location.default) - end - - def ReturnNode(arguments) - ReturnNode.new(arguments: arguments, location: Location.default) - end - - def Statements(body) - Statements.new(nil, body: body, location: Location.default) - end - - def VarField(value) - VarField.new(value: value, location: Location.default) - end - - def VarRef(value) - VarRef.new(value: value, location: Location.default) - end - end - - include DSL - attr_reader :iseq - - def initialize(iseq) - @iseq = iseq - end - - def to_ruby - stack = [] - - iseq.insns.each do |insn| - # skip line numbers and events - next unless insn.is_a?(Array) - - case insn[0] - when :getlocal_WC_0 - stack << VarRef(Ident(local_name(insn[1], 0))) - when :leave - stack << ReturnNode(Args([stack.pop])) - when :opt_and - left, right = stack.pop(2) - stack << Binary(left, :&, right) - when :opt_div - left, right = stack.pop(2) - stack << Binary(left, :/, right) - when :opt_eq - left, right = stack.pop(2) - stack << Binary(left, :==, right) - when :opt_ge - left, right = stack.pop(2) - stack << Binary(left, :>=, right) - when :opt_gt - left, right = stack.pop(2) - stack << Binary(left, :>, right) - when :opt_le - left, right = stack.pop(2) - stack << Binary(left, :<=, right) - when :opt_lt - left, right = stack.pop(2) - stack << Binary(left, :<, right) - when :opt_ltlt - left, right = stack.pop(2) - stack << Binary(left, :<<, right) - when :opt_minus - left, right = stack.pop(2) - stack << Binary(left, :-, right) - when :opt_mod - left, right = stack.pop(2) - stack << Binary(left, :%, right) - when :opt_mult - left, right = stack.pop(2) - stack << Binary(left, :*, right) - when :opt_neq - left, right = stack.pop(2) - stack << Binary(left, :"!=", right) - when :opt_or - left, right = stack.pop(2) - stack << Binary(left, :|, right) - when :opt_plus - left, right = stack.pop(2) - stack << Binary(left, :+, right) - when :opt_send_without_block - receiver, *arguments = stack.pop(insn[1][:orig_argc] + 1) - stack << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) - when :putobject - case insn[1] - when Float - stack << FloatLiteral(insn[1].inspect) - when Integer - stack << Int(insn[1].inspect) - else - raise "Unknown object type: #{insn[1].class.name}" - end - when :putobject_INT2FIX_0_ - stack << Int("0") - when :putobject_INT2FIX_1_ - stack << Int("1") - when :setlocal_WC_0 - stack << Assign(VarField(Ident(local_name(insn[1], 0))), stack.pop) - else - raise "Unknown instruction #{insn[0]}" - end - end - - Program(Statements(stack)) - end - - private - - def local_name(index, level) - current = iseq - level.times { current = current.parent_iseq } - current.local_table.locals[index].name.to_s - end - end - # These constants correspond to the putspecialobject instruction. They are # used to represent special objects that are pushed onto the stack. VM_SPECIAL_OBJECT_VMCORE = 1 diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb new file mode 100644 index 00000000..b826ebf2 --- /dev/null +++ b/lib/syntax_tree/yarv/bf.rb @@ -0,0 +1,466 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # Parses the given source code into a syntax tree, compiles that syntax tree + # into YARV bytecode. + class Bf + class Node + def format(q) + Format.new(q).visit(self) + end + + def pretty_print(q) + PrettyPrint.new(q).visit(self) + end + end + + # The root node of the syntax tree. + class Root < Node + attr_reader :nodes, :location + + def initialize(nodes:, location:) + @nodes = nodes + @location = location + end + + def accept(visitor) + visitor.visit_root(self) + end + + def child_nodes + nodes + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { nodes: nodes, location: location } + end + end + + # [ ... ] + class Loop < Node + attr_reader :nodes, :location + + def initialize(nodes:, location:) + @nodes = nodes + @location = location + end + + def accept(visitor) + visitor.visit_loop(self) + end + + def child_nodes + nodes + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { nodes: nodes, location: location } + end + end + + # + + class Increment < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_increment(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: "+", location: location } + end + end + + # - + class Decrement < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_decrement(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: "-", location: location } + end + end + + # > + class ShiftRight < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_shift_right(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: ">", location: location } + end + end + + # < + class ShiftLeft < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_shift_left(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: "<", location: location } + end + end + + # , + class Input < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_input(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: ",", location: location } + end + end + + # . + class Output < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_output(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: ".", location: location } + end + end + + # Allows visiting the syntax tree recursively. + class Visitor + def visit(node) + node.accept(self) + end + + def visit_all(nodes) + nodes.map { |node| visit(node) } + end + + def visit_child_nodes(node) + visit_all(node.child_nodes) + end + + # Visit a Root node. + alias visit_root visit_child_nodes + + # Visit a Loop node. + alias visit_loop visit_child_nodes + + # Visit an Increment node. + alias visit_increment visit_child_nodes + + # Visit a Decrement node. + alias visit_decrement visit_child_nodes + + # Visit a ShiftRight node. + alias visit_shift_right visit_child_nodes + + # Visit a ShiftLeft node. + alias visit_shift_left visit_child_nodes + + # Visit an Input node. + alias visit_input visit_child_nodes + + # Visit an Output node. + alias visit_output visit_child_nodes + end + + # Compiles the syntax tree into YARV bytecode. + class Compiler < Visitor + attr_reader :iseq + + def initialize + @iseq = InstructionSequence.new(:top, "", nil, Location.default) + end + + def visit_decrement(node) + change_by(-1) + end + + def visit_increment(node) + change_by(1) + end + + def visit_input(node) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.getglobal(:$stdin) + iseq.send(:getc, 0, VM_CALL_ARGS_SIMPLE) + iseq.send(:ord, 0, VM_CALL_ARGS_SIMPLE) + iseq.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + end + + def visit_loop(node) + start_label = iseq.label + + # First, we're going to compare the value at the current cursor to 0. + # If it's 0, then we'll jump past the loop. Otherwise we'll execute + # the loop. + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) + iseq.putobject(0) + iseq.send(:==, 1, VM_CALL_ARGS_SIMPLE) + branchunless = iseq.branchunless(-1) + + # Otherwise, here we'll execute the loop. + visit_nodes(node.nodes) + + # Now that we've visited all of the child nodes, we need to jump back + # to the start of the loop. + iseq.jump(start_label) + + # Now that we have all of the instructions in place, we can patch the + # branchunless to point to the next instruction for skipping the loop. + branchunless[1] = iseq.label + end + + def visit_output(node) + iseq.getglobal(:$stdout) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:chr, 0, VM_CALL_ARGS_SIMPLE) + iseq.send(:putc, 1, VM_CALL_ARGS_SIMPLE) + end + + def visit_root(node) + iseq.duphash({ 0 => 0 }) + iseq.setglobal(:$tape) + iseq.getglobal(:$tape) + iseq.putobject(0) + iseq.send(:default=, 1, VM_CALL_ARGS_SIMPLE) + + iseq.putobject(0) + iseq.setglobal(:$cursor) + + visit_nodes(node.nodes) + + iseq.putself + iseq.send(:exit, 0, VM_CALL_ARGS_SIMPLE) + iseq + end + + def visit_shift_left(node) + shift_by(-1) + end + + def visit_shift_right(node) + shift_by(1) + end + + private + + def change_by(value) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) + + if value < 0 + iseq.putobject(-value) + iseq.send(:-, 1, VM_CALL_ARGS_SIMPLE) + else + iseq.putobject(value) + iseq.send(:+, 1, VM_CALL_ARGS_SIMPLE) + end + + iseq.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + end + + def shift_by(value) + iseq.getglobal(:$cursor) + + if value < 0 + iseq.putobject(-value) + iseq.send(:-, 1, VM_CALL_ARGS_SIMPLE) + else + iseq.putobject(value) + iseq.send(:+, 1, VM_CALL_ARGS_SIMPLE) + end + + iseq.setglobal(:$cursor) + end + + def visit_nodes(nodes) + nodes + .chunk do |child| + case child + when Increment, Decrement + :change + when ShiftLeft, ShiftRight + :shift + else + :default + end + end + .each do |type, children| + case type + when :change + value = 0 + children.each { |child| value += child.is_a?(Increment) ? 1 : -1 } + change_by(value) + when :shift + value = 0 + children.each { |child| value += child.is_a?(ShiftRight) ? 1 : -1 } + shift_by(value) + else + visit_all(children) + end + end + end + end + + class Error < StandardError + end + + attr_reader :source + + def initialize(source) + @source = source + end + + def compile + Root.new(nodes: parse_segment(source, 0), location: 0...source.length).accept(Compiler.new) + end + + private + + def parse_segment(segment, offset) + index = 0 + nodes = [] + + while index < segment.length + location = offset + index + + case segment[index] + when "+" + nodes << Increment.new(location: location...(location + 1)) + index += 1 + when "-" + nodes << Decrement.new(location: location...(location + 1)) + index += 1 + when ">" + nodes << ShiftRight.new(location: location...(location + 1)) + index += 1 + when "<" + nodes << ShiftLeft.new(location: location...(location + 1)) + index += 1 + when "." + nodes << Output.new(location: location...(location + 1)) + index += 1 + when "," + nodes << Input.new(location: location...(location + 1)) + index += 1 + when "[" + matched = 1 + end_index = index + 1 + + while matched != 0 && end_index < segment.length + case segment[end_index] + when "[" + matched += 1 + when "]" + matched -= 1 + end + + end_index += 1 + end + + raise Error, "Unmatched start loop" if matched != 0 + + content = segment[(index + 1)...(end_index - 1)] + nodes << Loop.new( + nodes: parse_segment(content, offset + index + 1), + location: location...(offset + end_index) + ) + + index = end_index + when "]" + raise Error, "Unmatched end loop" + else + index += 1 + end + end + + nodes + end + end + end +end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb new file mode 100644 index 00000000..51d6fc08 --- /dev/null +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -0,0 +1,209 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class is responsible for taking a compiled instruction sequence and + # walking through it to generate equivalent Ruby code. + class Disassembler + include DSL + attr_reader :iseq, :label_name, :label_field, :label_ref + + def initialize(iseq) + @iseq = iseq + + @label_name = "__disasm_label" + @label_field = VarField(Ident(label_name)) + @label_ref = VarRef(Ident(label_name)) + end + + def to_ruby + Program(Statements(disassemble(iseq.insns))) + end + + private + + def node_for(value) + case value + when Integer + Int(value.to_s) + when Symbol + SymbolLiteral(Ident(value.to_s)) + end + end + + def disassemble(insns) + label = :label_0 + clauses = {} + clause = [] + + insns.each do |insn| + if insn.is_a?(Symbol) && insn.start_with?("label_") + clause << Assign(label_field, node_for(insn)) unless clause.last.is_a?(Next) + clauses[label] = clause + clause = [] + label = insn + next + end + + case insn[0] + when :branchunless + clause << IfNode(clause.pop, Statements([Assign(label_field, node_for(insn[1])), Next(Args([]))]), nil) + when :dup + clause << clause.last + when :duphash + assocs = insn[1].map { |key, value| Assoc(node_for(key), node_for(value)) } + clause << HashLiteral(LBrace("{"), assocs) + when :getglobal + clause << VarRef(GVar(insn[1].to_s)) + when :getlocal_WC_0 + clause << VarRef(Ident(local_name(insn[1], 0))) + when :jump + clause << Assign(label_field, node_for(insn[1])) + clause << Next(Args([])) + when :leave + clause << ReturnNode(Args([clause.pop])) + when :opt_and + left, right = clause.pop(2) + clause << Binary(left, :&, right) + when :opt_aref + collection, arg = clause.pop(2) + clause << ARef(collection, Args([arg])) + when :opt_aset + collection, arg, value = clause.pop(3) + + if value.is_a?(Binary) && value.left.is_a?(ARef) && collection === value.left.collection && arg === value.left.index.parts[0] + clause << OpAssign(ARefField(collection, Args([arg])), Op("#{value.operator}="), value.right) + else + clause << Assign(ARefField(collection, Args([arg])), value) + end + when :opt_div + left, right = clause.pop(2) + clause << Binary(left, :/, right) + when :opt_eq + left, right = clause.pop(2) + clause << Binary(left, :==, right) + when :opt_ge + left, right = clause.pop(2) + clause << Binary(left, :>=, right) + when :opt_gt + left, right = clause.pop(2) + clause << Binary(left, :>, right) + when :opt_le + left, right = clause.pop(2) + clause << Binary(left, :<=, right) + when :opt_lt + left, right = clause.pop(2) + clause << Binary(left, :<, right) + when :opt_ltlt + left, right = clause.pop(2) + clause << Binary(left, :<<, right) + when :opt_minus + left, right = clause.pop(2) + clause << Binary(left, :-, right) + when :opt_mod + left, right = clause.pop(2) + clause << Binary(left, :%, right) + when :opt_mult + left, right = clause.pop(2) + clause << Binary(left, :*, right) + when :opt_neq + left, right = clause.pop(2) + clause << Binary(left, :"!=", right) + when :opt_or + left, right = clause.pop(2) + clause << Binary(left, :|, right) + when :opt_plus + left, right = clause.pop(2) + clause << Binary(left, :+, right) + when :opt_send_without_block + if insn[1][:orig_argc] == 0 + clause << CallNode(clause.pop, Period("."), Ident(insn[1][:mid]), nil) + elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign(CallNode(receiver, Period("."), Ident(insn[1][:mid][0..-2]), nil), argument) + else + receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) + clause << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) + end + when :putobject + case insn[1] + when Float + clause << FloatLiteral(insn[1].inspect) + when Integer + clause << Int(insn[1].inspect) + else + raise "Unknown object type: #{insn[1].class.name}" + end + when :putobject_INT2FIX_0_ + clause << Int("0") + when :putobject_INT2FIX_1_ + clause << Int("1") + when :putself + clause << VarRef(Kw("self")) + when :setglobal + target = GVar(insn[1].to_s) + value = clause.pop + + if value.is_a?(Binary) && VarRef(target) === value.left + clause << OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + clause << Assign(VarField(target), value) + end + when :setlocal_WC_0 + target = Ident(local_name(insn[1], 0)) + value = clause.pop + + if value.is_a?(Binary) && VarRef(target) === value.left + clause << OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + clause << Assign(VarField(target), value) + end + else + raise "Unknown instruction #{insn[0]}" + end + end + + # If there's only one clause, then we don't need a case statement, and + # we can just disassemble the first clause. + clauses[label] = clause + return clauses.values.first if clauses.size == 1 + + # Here we're going to build up a big case statement that will handle all + # of the different labels. + current = nil + clauses.reverse_each do |label, clause| + current = When(Args([node_for(label)]), Statements(clause), current) + end + switch = Case(Kw("case"), label_ref, current) + + # Here we're going to make sure that any locals that were established in + # the label_0 block are initialized so that scoping rules work + # correctly. + stack = [] + locals = [label_name] + + clauses[:label_0].each do |node| + if node.is_a?(Assign) && node.target.is_a?(VarField) && node.target.value.is_a?(Ident) + value = node.target.value.value + next if locals.include?(value) + + stack << Assign(node.target, VarRef(Kw("nil"))) + locals << value + end + end + + # Finally, we'll set up the initial label and loop the entire case + # statement. + stack << Assign(label_field, node_for(:label_0)) + stack << MethodAddBlock(CallNode(nil, nil, Ident("loop"), Args([])), BlockNode(Kw("do"), nil, BodyStmt(Statements([switch]), nil, nil, nil, nil))) + stack + end + + def local_name(index, level) + current = iseq + level.times { current = current.parent_iseq } + current.local_table.locals[index].name.to_s + end + end + end +end From a1236fd6c4e4a22292e2a1d52facb95ecdc7a208 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 13:25:14 -0500 Subject: [PATCH 10/21] Default to VM_CALL_ARGS_SIMPLE --- lib/syntax_tree/compiler.rb | 20 ++++++------- lib/syntax_tree/dsl.rb | 4 +++ lib/syntax_tree/yarv.rb | 23 +++++++-------- lib/syntax_tree/yarv/bf.rb | 33 +++++++++++----------- lib/syntax_tree/yarv/disassembler.rb | 40 +++++++++++++++++--------- test/yarv_test.rb | 42 ++++++++++++++-------------- 6 files changed, 88 insertions(+), 74 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 926661cc..32b5f089 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -253,13 +253,13 @@ def visit_alias(node) iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(node.left) visit(node.right) - iseq.send(:"core#set_method_alias", 3, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:"core#set_method_alias", 3) end def visit_aref(node) visit(node.collection) visit(node.index) - iseq.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1) end def visit_arg_block(node) @@ -313,7 +313,7 @@ def visit_assign(node) visit(node.target.index) visit(node.value) iseq.setn(3) - iseq.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:[]=, 2) iseq.pop when ConstPathField names = constant_names(node.target) @@ -337,7 +337,7 @@ def visit_assign(node) visit(node.target) visit(node.value) iseq.setn(2) - iseq.send(:"#{node.target.name.value}=", 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:"#{node.target.name.value}=", 1) iseq.pop when TopConstField name = node.target.constant.value.to_sym @@ -420,7 +420,7 @@ def visit_binary(node) else visit(node.left) visit(node.right) - iseq.send(node.operator, 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(node.operator, 1) end end @@ -981,7 +981,7 @@ def visit_mrhs(node) def visit_not(node) visit(node.statement) - iseq.send(:!, 0, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:!, 0) end def visit_opassign(node) @@ -1367,7 +1367,7 @@ def visit_undef(node) iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(symbol) - iseq.send(:"core#undef_method", 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:"core#undef_method", 2) end end @@ -1523,7 +1523,7 @@ def visit_xstring_literal(node) def visit_yield(node) parts = argument_parts(node.arguments) visit_all(parts) - iseq.invokeblock(nil, parts.length, YARV::VM_CALL_ARGS_SIMPLE) + iseq.invokeblock(nil, parts.length) end def visit_zsuper(_node) @@ -1759,12 +1759,12 @@ def with_opassign(node) visit(node.target.index) iseq.dupn(2) - iseq.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1) yield iseq.setn(3) - iseq.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:[]=, 2) iseq.pop when ConstPathField name = node.target.constant.value.to_sym diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb index 05911ee3..1d1324df 100644 --- a/lib/syntax_tree/dsl.rb +++ b/lib/syntax_tree/dsl.rb @@ -38,6 +38,10 @@ def BodyStmt(statements, rescue_clause, else_keyword, else_clause, ensure_clause BodyStmt.new(statements: statements, rescue_clause: rescue_clause, else_keyword: else_keyword, else_clause: else_clause, ensure_clause: ensure_clause, location: Location.default) end + def Break(arguments) + Break.new(arguments: arguments, location: Location.default) + end + def CallNode(receiver, operator, message, arguments) CallNode.new(receiver: receiver, operator: operator, message: message, arguments: arguments, location: Location.default) end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 2224792a..822844fb 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -460,7 +460,7 @@ def intern push([:intern]) end - def invokeblock(method_id, argc, flag) + def invokeblock(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) stack.change_by(-argc + 1) push([:invokeblock, call_data(method_id, argc, flag)]) end @@ -547,7 +547,7 @@ def opt_newarray_max(length) push([:opt_newarray_max, length]) else newarray(length) - send(:max, 0, VM_CALL_ARGS_SIMPLE) + send(:max, 0) end end @@ -557,7 +557,7 @@ def opt_newarray_min(length) push([:opt_newarray_min, length]) else newarray(length) - send(:min, 0, VM_CALL_ARGS_SIMPLE) + send(:min, 0) end end @@ -569,22 +569,20 @@ def opt_setinlinecache(inline_storage) def opt_str_freeze(value) if specialized_instruction stack.change_by(+1) - push( - [:opt_str_freeze, value, call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE)] - ) + push([:opt_str_freeze, value, call_data(:freeze, 0)]) else putstring(value) - send(:freeze, 0, VM_CALL_ARGS_SIMPLE) + send(:freeze, 0) end end def opt_str_uminus(value) if specialized_instruction stack.change_by(+1) - push([:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)]) + push([:opt_str_uminus, value, call_data(:-@, 0)]) else putstring(value) - send(:-@, 0, VM_CALL_ARGS_SIMPLE) + send(:-@, 0) end end @@ -633,7 +631,7 @@ def putstring(object) push([:putstring, object]) end - def send(method_id, argc, flag, block_iseq = nil) + def send(method_id, argc, flag = VM_CALL_ARGS_SIMPLE, block_iseq = nil) stack.change_by(-(argc + 1) + 1) cdata = call_data(method_id, argc, flag) @@ -669,8 +667,7 @@ def send(method_id, argc, flag, block_iseq = nil) when [:|, 1] then push([:opt_or, cdata]) when [:[]=, 2] then push([:opt_aset, cdata]) when [:!=, 1] - eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) - push([:opt_neq, eql_data, cdata]) + push([:opt_neq, call_data(:==, 1), cdata]) else push([:opt_send_without_block, cdata]) end @@ -762,7 +759,7 @@ def toregexp(options, length) # This creates a call data object that is used as the operand for the # send, invokesuper, and objtostring instructions. - def call_data(method_id, argc, flag) + def call_data(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) { mid: method_id, flag: flag, orig_argc: argc } end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index b826ebf2..16098190 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -260,9 +260,9 @@ def visit_input(node) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) iseq.getglobal(:$stdin) - iseq.send(:getc, 0, VM_CALL_ARGS_SIMPLE) - iseq.send(:ord, 0, VM_CALL_ARGS_SIMPLE) - iseq.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + iseq.send(:getc, 0) + iseq.send(:ord, 0) + iseq.send(:[]=, 2) end def visit_loop(node) @@ -273,9 +273,9 @@ def visit_loop(node) # the loop. iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1) iseq.putobject(0) - iseq.send(:==, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:==, 1) branchunless = iseq.branchunless(-1) # Otherwise, here we'll execute the loop. @@ -294,9 +294,9 @@ def visit_output(node) iseq.getglobal(:$stdout) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) - iseq.send(:chr, 0, VM_CALL_ARGS_SIMPLE) - iseq.send(:putc, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1) + iseq.send(:chr, 0) + iseq.send(:putc, 1) end def visit_root(node) @@ -304,15 +304,14 @@ def visit_root(node) iseq.setglobal(:$tape) iseq.getglobal(:$tape) iseq.putobject(0) - iseq.send(:default=, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:default=, 1) iseq.putobject(0) iseq.setglobal(:$cursor) visit_nodes(node.nodes) - iseq.putself - iseq.send(:exit, 0, VM_CALL_ARGS_SIMPLE) + iseq.leave iseq end @@ -331,17 +330,17 @@ def change_by(value) iseq.getglobal(:$cursor) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1) if value < 0 iseq.putobject(-value) - iseq.send(:-, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:-, 1) else iseq.putobject(value) - iseq.send(:+, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:+, 1) end - iseq.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + iseq.send(:[]=, 2) end def shift_by(value) @@ -349,10 +348,10 @@ def shift_by(value) if value < 0 iseq.putobject(-value) - iseq.send(:-, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:-, 1) else iseq.putobject(value) - iseq.send(:+, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:+, 1) end iseq.setglobal(:$cursor) diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 51d6fc08..566ed984 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -17,7 +17,7 @@ def initialize(iseq) end def to_ruby - Program(Statements(disassemble(iseq.insns))) + Program(disassemble(iseq)) end private @@ -31,12 +31,12 @@ def node_for(value) end end - def disassemble(insns) + def disassemble(iseq) label = :label_0 clauses = {} clause = [] - insns.each do |insn| + iseq.insns.each do |insn| if insn.is_a?(Symbol) && insn.start_with?("label_") clause << Assign(label_field, node_for(insn)) unless clause.last.is_a?(Next) clauses[label] = clause @@ -61,7 +61,8 @@ def disassemble(insns) clause << Assign(label_field, node_for(insn[1])) clause << Next(Args([])) when :leave - clause << ReturnNode(Args([clause.pop])) + value = Args([clause.pop]) + clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) when :opt_and left, right = clause.pop(2) clause << Binary(left, :&, right) @@ -116,14 +117,27 @@ def disassemble(insns) left, right = clause.pop(2) clause << Binary(left, :+, right) when :opt_send_without_block - if insn[1][:orig_argc] == 0 - clause << CallNode(clause.pop, Period("."), Ident(insn[1][:mid]), nil) - elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") - receiver, argument = clause.pop(2) - clause << Assign(CallNode(receiver, Period("."), Ident(insn[1][:mid][0..-2]), nil), argument) + if insn[1][:flag] & VM_CALL_FCALL > 0 + if insn[1][:orig_argc] == 0 + clause.pop + clause << CallNode(nil, nil, Ident(insn[1][:mid]), Args([])) + elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") + _receiver, argument = clause.pop(2) + clause << Assign(CallNode(nil, nil, Ident(insn[1][:mid][0..-2]), nil), argument) + else + _receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) + clause << CallNode(nil, nil, Ident(insn[1][:mid]), ArgParen(Args(arguments))) + end else - receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) - clause << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) + if insn[1][:orig_argc] == 0 + clause << CallNode(clause.pop, Period("."), Ident(insn[1][:mid]), nil) + elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign(CallNode(receiver, Period("."), Ident(insn[1][:mid][0..-2]), nil), argument) + else + receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) + clause << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) + end end when :putobject case insn[1] @@ -166,7 +180,7 @@ def disassemble(insns) # If there's only one clause, then we don't need a case statement, and # we can just disassemble the first clause. clauses[label] = clause - return clauses.values.first if clauses.size == 1 + return Statements(clauses.values.first) if clauses.size == 1 # Here we're going to build up a big case statement that will handle all # of the different labels. @@ -196,7 +210,7 @@ def disassemble(insns) # statement. stack << Assign(label_field, node_for(:label_0)) stack << MethodAddBlock(CallNode(nil, nil, Ident("loop"), Args([])), BlockNode(Kw("do"), nil, BodyStmt(Statements([switch]), nil, nil, nil, nil))) - stack + Statements(stack) end def local_name(index, level) diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 57371ba3..da348224 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -6,27 +6,27 @@ module SyntaxTree class YARVTest < Minitest::Test CASES = { - "0" => "return 0\n", - "1" => "return 1\n", - "2" => "return 2\n", - "1.0" => "return 1.0\n", - "1 + 2" => "return 1 + 2\n", - "1 - 2" => "return 1 - 2\n", - "1 * 2" => "return 1 * 2\n", - "1 / 2" => "return 1 / 2\n", - "1 % 2" => "return 1 % 2\n", - "1 < 2" => "return 1 < 2\n", - "1 <= 2" => "return 1 <= 2\n", - "1 > 2" => "return 1 > 2\n", - "1 >= 2" => "return 1 >= 2\n", - "1 == 2" => "return 1 == 2\n", - "1 != 2" => "return 1 != 2\n", - "1 & 2" => "return 1 & 2\n", - "1 | 2" => "return 1 | 2\n", - "1 << 2" => "return 1 << 2\n", - "1 >> 2" => "return 1.>>(2)\n", - "1 ** 2" => "return 1.**(2)\n", - "a = 1; a" => "a = 1\nreturn a\n", + "0" => "break 0\n", + "1" => "break 1\n", + "2" => "break 2\n", + "1.0" => "break 1.0\n", + "1 + 2" => "break 1 + 2\n", + "1 - 2" => "break 1 - 2\n", + "1 * 2" => "break 1 * 2\n", + "1 / 2" => "break 1 / 2\n", + "1 % 2" => "break 1 % 2\n", + "1 < 2" => "break 1 < 2\n", + "1 <= 2" => "break 1 <= 2\n", + "1 > 2" => "break 1 > 2\n", + "1 >= 2" => "break 1 >= 2\n", + "1 == 2" => "break 1 == 2\n", + "1 != 2" => "break 1 != 2\n", + "1 & 2" => "break 1 & 2\n", + "1 | 2" => "break 1 | 2\n", + "1 << 2" => "break 1 << 2\n", + "1 >> 2" => "break 1.>>(2)\n", + "1 ** 2" => "break 1.**(2)\n", + "a = 1; a" => "a = 1\nbreak a\n", }.freeze CASES.each do |source, expected| From d8815de6b2c00ae2001980d557cc62302e029123 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 13:40:46 -0500 Subject: [PATCH 11/21] Add objects to wrap instructions --- .rubocop.yml | 3 + lib/syntax_tree.rb | 1 + lib/syntax_tree/compiler.rb | 109 ++- lib/syntax_tree/dsl.rb | 905 +++++++++++++++++++++- lib/syntax_tree/yarv.rb | 210 ++--- lib/syntax_tree/yarv/bf.rb | 553 ++++--------- lib/syntax_tree/yarv/disassembler.rb | 366 +++++---- lib/syntax_tree/yarv/instructions.rb | 1071 ++++++++++++++++++++++++++ test/yarv_test.rb | 11 +- 9 files changed, 2466 insertions(+), 763 deletions(-) create mode 100644 lib/syntax_tree/yarv/instructions.rb diff --git a/.rubocop.yml b/.rubocop.yml index d0bf0830..134a75dc 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -94,6 +94,9 @@ Style/MutableConstant: Style/NegatedIfElseCondition: Enabled: false +Style/Next: + Enabled: false + Style/NumericPredicate: Enabled: false diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 2cbfa2e4..792ba20c 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -31,6 +31,7 @@ require_relative "syntax_tree/compiler" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/disassembler" +require_relative "syntax_tree/yarv/instructions" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 32b5f089..8327a080 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -407,7 +407,7 @@ def visit_binary(node) iseq.pop visit(node.right) - branchunless[1] = iseq.label + branchunless.patch!(iseq) when :"||" visit(node.left) iseq.dup @@ -416,7 +416,7 @@ def visit_binary(node) iseq.pop visit(node.right) - branchif[1] = iseq.label + branchif.patch!(iseq) else visit(node.left) visit(node.right) @@ -567,7 +567,7 @@ def visit_call(node) flag |= YARV::VM_CALL_FCALL if node.receiver.nil? iseq.send(node.message.value.to_sym, argc, flag, block_iseq) - branchnil[1] = iseq.label if branchnil + branchnil.patch!(iseq) if branchnil end def visit_case(node) @@ -600,7 +600,7 @@ def visit_case(node) branches.each_with_index do |(clause, branchif), index| iseq.leave if index != 0 - branchif[1] = iseq.label + branchif.patch!(iseq) iseq.pop visit(clause) end @@ -616,21 +616,21 @@ def visit_class(node) iseq.leave end - flags = YARV::VM_DEFINECLASS_TYPE_CLASS + flags = YARV::DefineClass::TYPE_CLASS case node.constant when ConstPathRef - flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::DefineClass::FLAG_SCOPED visit(node.constant.parent) when ConstRef iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef - flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::DefineClass::FLAG_SCOPED iseq.putobject(Object) end if node.superclass - flags |= YARV::VM_DEFINECLASS_FLAG_HAS_SUPERCLASS + flags |= YARV::DefineClass::FLAG_HAS_SUPERCLASS visit(node.superclass) else iseq.putnil @@ -675,16 +675,16 @@ def visit_const_path_ref(node) end def visit_def(node) - method_iseq = - with_child_iseq(iseq.method_child_iseq(node.name.value, node.location)) do - visit(node.params) if node.params - iseq.event(:RUBY_EVENT_CALL) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_RETURN) - iseq.leave - end - name = node.name.value.to_sym + method_iseq = iseq.method_child_iseq(name.to_s, node.location) + + with_child_iseq(method_iseq) do + visit(node.params) if node.params + iseq.event(:RUBY_EVENT_CALL) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_RETURN) + iseq.leave + end if node.target visit(node.target) @@ -714,18 +714,18 @@ def visit_defined(node) case value when Const iseq.putnil - iseq.defined(YARV::DEFINED_CONST, name, "constant") + iseq.defined(YARV::Defined::CONST, name, "constant") when CVar iseq.putnil - iseq.defined(YARV::DEFINED_CVAR, name, "class variable") + iseq.defined(YARV::Defined::CVAR, name, "class variable") when GVar iseq.putnil - iseq.defined(YARV::DEFINED_GVAR, name, "global-variable") + iseq.defined(YARV::Defined::GVAR, name, "global-variable") when Ident iseq.putobject("local-variable") when IVar iseq.putnil - iseq.defined(YARV::DEFINED_IVAR, name, "instance-variable") + iseq.defined(YARV::Defined::IVAR, name, "instance-variable") when Kw case name when :false @@ -742,13 +742,13 @@ def visit_defined(node) iseq.putself name = node.value.value.value.to_sym - iseq.defined(YARV::DEFINED_FUNC, name, "method") + iseq.defined(YARV::Defined::FUNC, name, "method") when YieldNode iseq.putnil - iseq.defined(YARV::DEFINED_YIELD, false, "yield") + iseq.defined(YARV::Defined::YIELD, false, "yield") when ZSuper iseq.putnil - iseq.defined(YARV::DEFINED_ZSUPER, false, "super") + iseq.defined(YARV::Defined::ZSUPER, false, "super") else iseq.putobject("expression") end @@ -842,7 +842,7 @@ def visit_if(node) if last_statement? iseq.leave - branchunless[1] = iseq.label + branchunless.patch!(iseq) node.consequent ? visit(node.consequent) : iseq.putnil else @@ -850,11 +850,11 @@ def visit_if(node) if node.consequent jump = iseq.jump(-1) - branchunless[1] = iseq.label + branchunless.patch!(iseq) visit(node.consequent) jump[1] = iseq.label else - branchunless[1] = iseq.label + branchunless.patch!(iseq) end end end @@ -953,16 +953,16 @@ def visit_module(node) iseq.leave end - flags = YARV::VM_DEFINECLASS_TYPE_MODULE + flags = YARV::DefineClass::TYPE_MODULE case node.constant when ConstPathRef - flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::DefineClass::FLAG_SCOPED visit(node.constant.parent) when ConstRef iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef - flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::DefineClass::FLAG_SCOPED iseq.putobject(Object) end @@ -1004,15 +1004,15 @@ def visit_opassign(node) case node.target when ARefField iseq.leave - branchunless[1] = iseq.label + branchunless.patch!(iseq) iseq.setn(3) iseq.adjuststack(3) when ConstPathField, TopConstField - branchunless[1] = iseq.label + branchunless.patch!(iseq) iseq.swap iseq.pop else - branchunless[1] = iseq.label + branchunless.patch!(iseq) end when :"||" if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) @@ -1034,11 +1034,11 @@ def visit_opassign(node) if node.target.is_a?(ARefField) iseq.leave - branchif[1] = iseq.label + branchif.patch!(iseq) iseq.setn(3) iseq.adjuststack(3) else - branchif[1] = iseq.label + branchif.patch!(iseq) end end else @@ -1092,7 +1092,10 @@ def visit_params(node) if node.keywords.any? argument_options[:kwbits] = 0 argument_options[:keyword] = [] - checkkeywords = [] + + keyword_bits_name = node.keyword_rest ? 3 : 2 + iseq.argument_size += 1 + keyword_bits_index = iseq.local_table.locals.size + node.keywords.size node.keywords.each_with_index do |(keyword, value), keyword_index| name = keyword.value.chomp(":").to_sym @@ -1105,24 +1108,18 @@ def visit_params(node) if value.nil? argument_options[:keyword] << name elsif (compiled = RubyVisitor.compile(value)) - compiled = value.accept(RubyVisitor.new) argument_options[:keyword] << [name, compiled] else argument_options[:keyword] << [name] - checkkeywords << iseq.checkkeyword(-1, keyword_index) + iseq.checkkeyword(keyword_bits_index, keyword_index) branchif = iseq.branchif(-1) visit(value) iseq.setlocal(index, 0) - branchif[1] = iseq.label + branchif.patch!(iseq) end end - name = node.keyword_rest ? 3 : 2 - iseq.argument_size += 1 - iseq.local_table.plain(name) - - lookup = iseq.local_table.find(name, 0) - checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } + iseq.local_table.plain(keyword_bits_name) end if node.keyword_rest.is_a?(ArgsForward) @@ -1251,7 +1248,7 @@ def visit_sclass(node) iseq.defineclass( :singletonclass, singleton_iseq, - YARV::VM_DEFINECLASS_TYPE_SINGLETON_CLASS + YARV::DefineClass::TYPE_SINGLETON_CLASS ) end @@ -1378,7 +1375,7 @@ def visit_unless(node) if last_statement? iseq.leave - branchunless[1] = iseq.label + branchunless.patch!(iseq) visit(node.statements) else @@ -1386,11 +1383,11 @@ def visit_unless(node) if node.consequent jump = iseq.jump(-1) - branchunless[1] = iseq.label + branchunless.patch!(iseq) visit(node.consequent) jump[1] = iseq.label else - branchunless[1] = iseq.label + branchunless.patch!(iseq) end end end @@ -1598,24 +1595,24 @@ def opassign_defined(node) name = node.target.constant.value.to_sym iseq.dup - iseq.defined(YARV::DEFINED_CONST_FROM, name, true) + iseq.defined(YARV::Defined::CONST_FROM, name, true) when TopConstField name = node.target.constant.value.to_sym iseq.putobject(Object) iseq.dup - iseq.defined(YARV::DEFINED_CONST_FROM, name, true) + iseq.defined(YARV::Defined::CONST_FROM, name, true) when VarField name = node.target.value.value.to_sym iseq.putnil case node.target.value when Const - iseq.defined(YARV::DEFINED_CONST, name, true) + iseq.defined(YARV::Defined::CONST, name, true) when CVar - iseq.defined(YARV::DEFINED_CVAR, name, true) + iseq.defined(YARV::Defined::CVAR, name, true) when GVar - iseq.defined(YARV::DEFINED_GVAR, name, true) + iseq.defined(YARV::Defined::GVAR, name, true) end end @@ -1641,7 +1638,7 @@ def opassign_defined(node) branchif = iseq.branchif(-1) iseq.pop - branchunless[1] = iseq.label + branchunless.patch!(iseq) visit(node.value) case node.target @@ -1663,7 +1660,7 @@ def opassign_defined(node) end end - branchif[1] = iseq.label + branchif.patch!(iseq) end # Whenever a value is interpolated into a string-like structure, these diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb index 1d1324df..860a1fe5 100644 --- a/lib/syntax_tree/dsl.rb +++ b/lib/syntax_tree/dsl.rb @@ -1,133 +1,1004 @@ # frozen_string_literal: true module SyntaxTree + # This module provides shortcuts for creating AST nodes. module DSL + # Create a new BEGINBlock node. + def BEGINBlock(lbrace, statements) + BEGINBlock.new( + lbrace: lbrace, + statements: statements, + location: Location.default + ) + end + + # Create a new CHAR node. + def CHAR(value) + CHAR.new(value: value, location: Location.default) + end + + # Create a new ENDBlock node. + def ENDBlock(lbrace, statements) + ENDBlock.new( + lbrace: lbrace, + statements: statements, + location: Location.default + ) + end + + # Create a new EndContent node. + def EndContent(value) + EndContent.new(value: value, location: Location.default) + end + + # Create a new AliasNode node. + def AliasNode(left, right) + AliasNode.new(left: left, right: right, location: Location.default) + end + + # Create a new ARef node. def ARef(collection, index) ARef.new(collection: collection, index: index, location: Location.default) end + # Create a new ARefField node. def ARefField(collection, index) - ARefField.new(collection: collection, index: index, location: Location.default) + ARefField.new( + collection: collection, + index: index, + location: Location.default + ) end + # Create a new ArgParen node. + def ArgParen(arguments) + ArgParen.new(arguments: arguments, location: Location.default) + end + + # Create a new Args node. def Args(parts) Args.new(parts: parts, location: Location.default) end - def ArgParen(arguments) - ArgParen.new(arguments: arguments, location: Location.default) + # Create a new ArgBlock node. + def ArgBlock(value) + ArgBlock.new(value: value, location: Location.default) + end + + # Create a new ArgStar node. + def ArgStar(value) + ArgStar.new(value: value, location: Location.default) + end + + # Create a new ArgsForward node. + def ArgsForward + ArgsForward.new(location: Location.default) + end + + # Create a new ArrayLiteral node. + def ArrayLiteral(lbracket, contents) + ArrayLiteral.new( + lbracket: lbracket, + contents: contents, + location: Location.default + ) end + # Create a new AryPtn node. + def AryPtn(constant, requireds, rest, posts) + AryPtn.new( + constant: constant, + requireds: requireds, + rest: rest, + posts: posts, + location: Location.default + ) + end + + # Create a new Assign node. def Assign(target, value) Assign.new(target: target, value: value, location: Location.default) end + # Create a new Assoc node. def Assoc(key, value) Assoc.new(key: key, value: value, location: Location.default) end - def Binary(left, operator, right) - Binary.new(left: left, operator: operator, right: right, location: Location.default) + # Create a new AssocSplat node. + def AssocSplat(value) + AssocSplat.new(value: value, location: Location.default) end - def BlockNode(opening, block_var, bodystmt) - BlockNode.new(opening: opening, block_var: block_var, bodystmt: bodystmt, location: Location.default) + # Create a new Backref node. + def Backref(value) + Backref.new(value: value, location: Location.default) + end + + # Create a new Backtick node. + def Backtick(value) + Backtick.new(value: value, location: Location.default) + end + + # Create a new BareAssocHash node. + def BareAssocHash(assocs) + BareAssocHash.new(assocs: assocs, location: Location.default) end - def BodyStmt(statements, rescue_clause, else_keyword, else_clause, ensure_clause) - BodyStmt.new(statements: statements, rescue_clause: rescue_clause, else_keyword: else_keyword, else_clause: else_clause, ensure_clause: ensure_clause, location: Location.default) + # Create a new Begin node. + def Begin(bodystmt) + Begin.new(bodystmt: bodystmt, location: Location.default) end + # Create a new PinnedBegin node. + def PinnedBegin(statement) + PinnedBegin.new(statement: statement, location: Location.default) + end + + # Create a new Binary node. + def Binary(left, operator, right) + Binary.new( + left: left, + operator: operator, + right: right, + location: Location.default + ) + end + + # Create a new BlockVar node. + def BlockVar(params, locals) + BlockVar.new(params: params, locals: locals, location: Location.default) + end + + # Create a new BlockArg node. + def BlockArg(name) + BlockArg.new(name: name, location: Location.default) + end + + # Create a new BodyStmt node. + def BodyStmt( + statements, + rescue_clause, + else_keyword, + else_clause, + ensure_clause + ) + BodyStmt.new( + statements: statements, + rescue_clause: rescue_clause, + else_keyword: else_keyword, + else_clause: else_clause, + ensure_clause: ensure_clause, + location: Location.default + ) + end + + # Create a new Break node. def Break(arguments) Break.new(arguments: arguments, location: Location.default) end + # Create a new CallNode node. def CallNode(receiver, operator, message, arguments) - CallNode.new(receiver: receiver, operator: operator, message: message, arguments: arguments, location: Location.default) + CallNode.new( + receiver: receiver, + operator: operator, + message: message, + arguments: arguments, + location: Location.default + ) end + # Create a new Case node. def Case(keyword, value, consequent) - Case.new(keyword: keyword, value: value, consequent: consequent, location: Location.default) + Case.new( + keyword: keyword, + value: value, + consequent: consequent, + location: Location.default + ) + end + + # Create a new RAssign node. + def RAssign(value, operator, pattern) + RAssign.new( + value: value, + operator: operator, + pattern: pattern, + location: Location.default + ) + end + + # Create a new ClassDeclaration node. + def ClassDeclaration(constant, superclass, bodystmt) + ClassDeclaration.new( + constant: constant, + superclass: superclass, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new Comma node. + def Comma(value) + Comma.new(value: value, location: Location.default) end + # Create a new Command node. + def Command(message, arguments, block) + Command.new( + message: message, + arguments: arguments, + block: block, + location: Location.default + ) + end + + # Create a new CommandCall node. + def CommandCall(receiver, operator, message, arguments, block) + CommandCall.new( + receiver: receiver, + operator: operator, + message: message, + arguments: arguments, + block: block, + location: Location.default + ) + end + + # Create a new Comment node. + def Comment(value, inline) + Comment.new(value: value, inline: inline, location: Location.default) + end + + # Create a new Const node. + def Const(value) + Const.new(value: value, location: Location.default) + end + + # Create a new ConstPathField node. + def ConstPathField(parent, constant) + ConstPathField.new( + parent: parent, + constant: constant, + location: Location.default + ) + end + + # Create a new ConstPathRef node. + def ConstPathRef(parent, constant) + ConstPathRef.new( + parent: parent, + constant: constant, + location: Location.default + ) + end + + # Create a new ConstRef node. + def ConstRef(constant) + ConstRef.new(constant: constant, location: Location.default) + end + + # Create a new CVar node. + def CVar(value) + CVar.new(value: value, location: Location.default) + end + + # Create a new DefNode node. + def DefNode(target, operator, name, params, bodystmt) + DefNode.new( + target: target, + operator: operator, + name: name, + params: params, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new Defined node. + def Defined(value) + Defined.new(value: value, location: Location.default) + end + + # Create a new BlockNode node. + def BlockNode(opening, block_var, bodystmt) + BlockNode.new( + opening: opening, + block_var: block_var, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new RangeNode node. + def RangeNode(left, operator, right) + RangeNode.new( + left: left, + operator: operator, + right: right, + location: Location.default + ) + end + + # Create a new DynaSymbol node. + def DynaSymbol(parts, quote) + DynaSymbol.new(parts: parts, quote: quote, location: Location.default) + end + + # Create a new Else node. + def Else(keyword, statements) + Else.new( + keyword: keyword, + statements: statements, + location: Location.default + ) + end + + # Create a new Elsif node. + def Elsif(predicate, statements, consequent) + Elsif.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new EmbDoc node. + def EmbDoc(value) + EmbDoc.new(value: value, location: Location.default) + end + + # Create a new EmbExprBeg node. + def EmbExprBeg(value) + EmbExprBeg.new(value: value, location: Location.default) + end + + # Create a new EmbExprEnd node. + def EmbExprEnd(value) + EmbExprEnd.new(value: value, location: Location.default) + end + + # Create a new EmbVar node. + def EmbVar(value) + EmbVar.new(value: value, location: Location.default) + end + + # Create a new Ensure node. + def Ensure(keyword, statements) + Ensure.new( + keyword: keyword, + statements: statements, + location: Location.default + ) + end + + # Create a new ExcessedComma node. + def ExcessedComma(value) + ExcessedComma.new(value: value, location: Location.default) + end + + # Create a new Field node. + def Field(parent, operator, name) + Field.new( + parent: parent, + operator: operator, + name: name, + location: Location.default + ) + end + + # Create a new FloatLiteral node. def FloatLiteral(value) FloatLiteral.new(value: value, location: Location.default) end + # Create a new FndPtn node. + def FndPtn(constant, left, values, right) + FndPtn.new( + constant: constant, + left: left, + values: values, + right: right, + location: Location.default + ) + end + + # Create a new For node. + def For(index, collection, statements) + For.new( + index: index, + collection: collection, + statements: statements, + location: Location.default + ) + end + + # Create a new GVar node. def GVar(value) GVar.new(value: value, location: Location.default) end + # Create a new HashLiteral node. def HashLiteral(lbrace, assocs) - HashLiteral.new(lbrace: lbrace, assocs: assocs, location: Location.default) + HashLiteral.new( + lbrace: lbrace, + assocs: assocs, + location: Location.default + ) + end + + # Create a new Heredoc node. + def Heredoc(beginning, ending, dedent, parts) + Heredoc.new( + beginning: beginning, + ending: ending, + dedent: dedent, + parts: parts, + location: Location.default + ) + end + + # Create a new HeredocBeg node. + def HeredocBeg(value) + HeredocBeg.new(value: value, location: Location.default) + end + + # Create a new HeredocEnd node. + def HeredocEnd(value) + HeredocEnd.new(value: value, location: Location.default) + end + + # Create a new HshPtn node. + def HshPtn(constant, keywords, keyword_rest) + HshPtn.new( + constant: constant, + keywords: keywords, + keyword_rest: keyword_rest, + location: Location.default + ) end + # Create a new Ident node. def Ident(value) Ident.new(value: value, location: Location.default) end + # Create a new IfNode node. def IfNode(predicate, statements, consequent) - IfNode.new(predicate: predicate, statements: statements, consequent: consequent, location: Location.default) + IfNode.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) end + # Create a new IfOp node. + def IfOp(predicate, truthy, falsy) + IfOp.new( + predicate: predicate, + truthy: truthy, + falsy: falsy, + location: Location.default + ) + end + + # Create a new Imaginary node. + def Imaginary(value) + Imaginary.new(value: value, location: Location.default) + end + + # Create a new In node. + def In(pattern, statements, consequent) + In.new( + pattern: pattern, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new Int node. def Int(value) Int.new(value: value, location: Location.default) end + # Create a new IVar node. + def IVar(value) + IVar.new(value: value, location: Location.default) + end + + # Create a new Kw node. def Kw(value) Kw.new(value: value, location: Location.default) end + # Create a new KwRestParam node. + def KwRestParam(name) + KwRestParam.new(name: name, location: Location.default) + end + + # Create a new Label node. + def Label(value) + Label.new(value: value, location: Location.default) + end + + # Create a new LabelEnd node. + def LabelEnd(value) + LabelEnd.new(value: value, location: Location.default) + end + + # Create a new Lambda node. + def Lambda(params, statements) + Lambda.new( + params: params, + statements: statements, + location: Location.default + ) + end + + # Create a new LambdaVar node. + def LambdaVar(params, locals) + LambdaVar.new(params: params, locals: locals, location: Location.default) + end + + # Create a new LBrace node. def LBrace(value) LBrace.new(value: value, location: Location.default) end + # Create a new LBracket node. + def LBracket(value) + LBracket.new(value: value, location: Location.default) + end + + # Create a new LParen node. + def LParen(value) + LParen.new(value: value, location: Location.default) + end + + # Create a new MAssign node. + def MAssign(target, value) + MAssign.new(target: target, value: value, location: Location.default) + end + + # Create a new MethodAddBlock node. def MethodAddBlock(call, block) MethodAddBlock.new(call: call, block: block, location: Location.default) end + # Create a new MLHS node. + def MLHS(parts, comma) + MLHS.new(parts: parts, comma: comma, location: Location.default) + end + + # Create a new MLHSParen node. + def MLHSParen(contents, comma) + MLHSParen.new( + contents: contents, + comma: comma, + location: Location.default + ) + end + + # Create a new ModuleDeclaration node. + def ModuleDeclaration(constant, bodystmt) + ModuleDeclaration.new( + constant: constant, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new MRHS node. + def MRHS(parts) + MRHS.new(parts: parts, location: Location.default) + end + + # Create a new Next node. def Next(arguments) Next.new(arguments: arguments, location: Location.default) end + # Create a new Op node. def Op(value) Op.new(value: value, location: Location.default) end + # Create a new OpAssign node. def OpAssign(target, operator, value) - OpAssign.new(target: target, operator: operator, value: value, location: Location.default) - end - + OpAssign.new( + target: target, + operator: operator, + value: value, + location: Location.default + ) + end + + # Create a new Params node. + def Params(requireds, optionals, rest, posts, keywords, keyword_rest, block) + Params.new( + requireds: requireds, + optionals: optionals, + rest: rest, + posts: posts, + keywords: keywords, + keyword_rest: keyword_rest, + block: block, + location: Location.default + ) + end + + # Create a new Paren node. + def Paren(lparen, contents) + Paren.new(lparen: lparen, contents: contents, location: Location.default) + end + + # Create a new Period node. def Period(value) Period.new(value: value, location: Location.default) end + # Create a new Program node. def Program(statements) Program.new(statements: statements, location: Location.default) end + # Create a new QSymbols node. + def QSymbols(beginning, elements) + QSymbols.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new QSymbolsBeg node. + def QSymbolsBeg(value) + QSymbolsBeg.new(value: value, location: Location.default) + end + + # Create a new QWords node. + def QWords(beginning, elements) + QWords.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new QWordsBeg node. + def QWordsBeg(value) + QWordsBeg.new(value: value, location: Location.default) + end + + # Create a new RationalLiteral node. + def RationalLiteral(value) + RationalLiteral.new(value: value, location: Location.default) + end + + # Create a new RBrace node. + def RBrace(value) + RBrace.new(value: value, location: Location.default) + end + + # Create a new RBracket node. + def RBracket(value) + RBracket.new(value: value, location: Location.default) + end + + # Create a new Redo node. + def Redo + Redo.new(location: Location.default) + end + + # Create a new RegexpContent node. + def RegexpContent(beginning, parts) + RegexpContent.new( + beginning: beginning, + parts: parts, + location: Location.default + ) + end + + # Create a new RegexpBeg node. + def RegexpBeg(value) + RegexpBeg.new(value: value, location: Location.default) + end + + # Create a new RegexpEnd node. + def RegexpEnd(value) + RegexpEnd.new(value: value, location: Location.default) + end + + # Create a new RegexpLiteral node. + def RegexpLiteral(beginning, ending, parts) + RegexpLiteral.new( + beginning: beginning, + ending: ending, + parts: parts, + location: Location.default + ) + end + + # Create a new RescueEx node. + def RescueEx(exceptions, variable) + RescueEx.new( + exceptions: exceptions, + variable: variable, + location: Location.default + ) + end + + # Create a new Rescue node. + def Rescue(keyword, exception, statements, consequent) + Rescue.new( + keyword: keyword, + exception: exception, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new RescueMod node. + def RescueMod(statement, value) + RescueMod.new( + statement: statement, + value: value, + location: Location.default + ) + end + + # Create a new RestParam node. + def RestParam(name) + RestParam.new(name: name, location: Location.default) + end + + # Create a new Retry node. + def Retry + Retry.new(location: Location.default) + end + + # Create a new ReturnNode node. def ReturnNode(arguments) ReturnNode.new(arguments: arguments, location: Location.default) end + # Create a new RParen node. + def RParen(value) + RParen.new(value: value, location: Location.default) + end + + # Create a new SClass node. + def SClass(target, bodystmt) + SClass.new(target: target, bodystmt: bodystmt, location: Location.default) + end + + # Create a new Statements node. def Statements(body) Statements.new(nil, body: body, location: Location.default) end + # Create a new StringContent node. + def StringContent(parts) + StringContent.new(parts: parts, location: Location.default) + end + + # Create a new StringConcat node. + def StringConcat(left, right) + StringConcat.new(left: left, right: right, location: Location.default) + end + + # Create a new StringDVar node. + def StringDVar(variable) + StringDVar.new(variable: variable, location: Location.default) + end + + # Create a new StringEmbExpr node. + def StringEmbExpr(statements) + StringEmbExpr.new(statements: statements, location: Location.default) + end + + # Create a new StringLiteral node. + def StringLiteral(parts, quote) + StringLiteral.new(parts: parts, quote: quote, location: Location.default) + end + + # Create a new Super node. + def Super(arguments) + Super.new(arguments: arguments, location: Location.default) + end + + # Create a new SymBeg node. + def SymBeg(value) + SymBeg.new(value: value, location: Location.default) + end + + # Create a new SymbolContent node. + def SymbolContent(value) + SymbolContent.new(value: value, location: Location.default) + end + + # Create a new SymbolLiteral node. def SymbolLiteral(value) SymbolLiteral.new(value: value, location: Location.default) end + # Create a new Symbols node. + def Symbols(beginning, elements) + Symbols.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new SymbolsBeg node. + def SymbolsBeg(value) + SymbolsBeg.new(value: value, location: Location.default) + end + + # Create a new TLambda node. + def TLambda(value) + TLambda.new(value: value, location: Location.default) + end + + # Create a new TLamBeg node. + def TLamBeg(value) + TLamBeg.new(value: value, location: Location.default) + end + + # Create a new TopConstField node. + def TopConstField(constant) + TopConstField.new(constant: constant, location: Location.default) + end + + # Create a new TopConstRef node. + def TopConstRef(constant) + TopConstRef.new(constant: constant, location: Location.default) + end + + # Create a new TStringBeg node. + def TStringBeg(value) + TStringBeg.new(value: value, location: Location.default) + end + + # Create a new TStringContent node. + def TStringContent(value) + TStringContent.new(value: value, location: Location.default) + end + + # Create a new TStringEnd node. + def TStringEnd(value) + TStringEnd.new(value: value, location: Location.default) + end + + # Create a new Not node. + def Not(statement, parentheses) + Not.new( + statement: statement, + parentheses: parentheses, + location: Location.default + ) + end + + # Create a new Unary node. + def Unary(operator, statement) + Unary.new( + operator: operator, + statement: statement, + location: Location.default + ) + end + + # Create a new Undef node. + def Undef(symbols) + Undef.new(symbols: symbols, location: Location.default) + end + + # Create a new UnlessNode node. + def UnlessNode(predicate, statements, consequent) + UnlessNode.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new UntilNode node. + def UntilNode(predicate, statements) + UntilNode.new( + predicate: predicate, + statements: statements, + location: Location.default + ) + end + + # Create a new VarField node. def VarField(value) VarField.new(value: value, location: Location.default) end + # Create a new VarRef node. def VarRef(value) VarRef.new(value: value, location: Location.default) end + # Create a new PinnedVarRef node. + def PinnedVarRef(value) + PinnedVarRef.new(value: value, location: Location.default) + end + + # Create a new VCall node. + def VCall(value) + VCall.new(value: value, location: Location.default) + end + + # Create a new VoidStmt node. + def VoidStmt + VoidStmt.new(location: Location.default) + end + + # Create a new When node. def When(arguments, statements, consequent) - When.new(arguments: arguments, statements: statements, consequent: consequent, location: Location.default) + When.new( + arguments: arguments, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new WhileNode node. + def WhileNode(predicate, statements) + WhileNode.new( + predicate: predicate, + statements: statements, + location: Location.default + ) + end + + # Create a new Word node. + def Word(parts) + Word.new(parts: parts, location: Location.default) + end + + # Create a new Words node. + def Words(beginning, elements) + Words.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new WordsBeg node. + def WordsBeg(value) + WordsBeg.new(value: value, location: Location.default) + end + + # Create a new XString node. + def XString(parts) + XString.new(parts: parts, location: Location.default) + end + + # Create a new XStringLiteral node. + def XStringLiteral(parts) + XStringLiteral.new(parts: parts, location: Location.default) + end + + # Create a new YieldNode node. + def YieldNode(arguments) + YieldNode.new(arguments: arguments, location: Location.default) + end + + # Create a new ZSuper node. + def ZSuper + ZSuper.new(location: Location.default) end end end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 822844fb..a29714a5 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -206,7 +206,12 @@ def inline_storage_for(name) def length insns.inject(0) do |sum, insn| - insn.is_a?(Array) ? sum + insn.length : sum + case insn + when Integer, Symbol + sum + else + sum + insn.length + end end end @@ -241,7 +246,38 @@ def to_a local_table.names, argument_options, [], - insns.map { |insn| serialize(insn) } + insns.map do |insn| + case insn + when Integer, Symbol + insn + when Array + case insn[0] + when :setlocal_WC_0, :setlocal_WC_1, :setlocal + iseq = self + + case insn[0] + when :setlocal_WC_1 + iseq = iseq.parent_iseq + when :setlocal + insn[2].times { iseq = iseq.parent_iseq } + end + + # Here we need to map the local variable index to the offset + # from the top of the stack where it will be stored. + [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] + when :send + # For any instructions that push instruction sequences onto the + # stack, we need to call #to_a on them as well. + [insn[0], insn[1], (insn[2].to_a if insn[2])] + when :once + [insn[0], insn[1].to_a, insn[2]] + else + insn + end + else + insn.to_a(self) + end + end ] end @@ -289,7 +325,14 @@ def singleton_class_child_iseq(location) def push(insn) insns << insn - insn + + case insn + when Integer, Symbol, Array + insn + else + stack.change_by(-insn.pops + insn.pushes) + insn + end end # This creates a new label at the current length of the instruction @@ -304,134 +347,106 @@ def event(name) end def adjuststack(number) - stack.change_by(-number) - push([:adjuststack, number]) + push(AdjustStack.new(number)) end def anytostring - stack.change_by(-2 + 1) - push([:anytostring]) + push(AnyToString.new) end - def branchif(index) - stack.change_by(-1) - push([:branchif, index]) + def branchif(label) + push(BranchIf.new(label)) end - def branchnil(index) - stack.change_by(-1) - push([:branchnil, index]) + def branchnil(label) + push(BranchNil.new(label)) end - def branchunless(index) - stack.change_by(-1) - push([:branchunless, index]) + def branchunless(label) + push(BranchUnless.new(label)) end - def checkkeyword(index, keyword_index) - stack.change_by(+1) - push([:checkkeyword, index, keyword_index]) + def checkkeyword(keyword_bits_index, keyword_index) + push(CheckKeyword.new(keyword_bits_index, keyword_index)) end def concatarray - stack.change_by(-2 + 1) - push([:concatarray]) + push(ConcatArray.new) end def concatstrings(number) - stack.change_by(-number + 1) - push([:concatstrings, number]) + push(ConcatStrings.new(number)) end def defined(type, name, message) - stack.change_by(-1 + 1) - push([:defined, type, name, message]) + push(Defined.new(type, name, message)) end def defineclass(name, class_iseq, flags) - stack.change_by(-2 + 1) - push([:defineclass, name, class_iseq, flags]) + push(DefineClass.new(name, class_iseq, flags)) end def definemethod(name, method_iseq) - stack.change_by(0) - push([:definemethod, name, method_iseq]) + push(DefineMethod.new(name, method_iseq)) end def definesmethod(name, method_iseq) - stack.change_by(-1) - push([:definesmethod, name, method_iseq]) + push(DefineSMethod.new(name, method_iseq)) end def dup - stack.change_by(-1 + 2) - push([:dup]) + push(Dup.new) end def duparray(object) - stack.change_by(+1) - push([:duparray, object]) + push(DupArray.new(object)) end def duphash(object) - stack.change_by(+1) - push([:duphash, object]) + push(DupHash.new(object)) end def dupn(number) - stack.change_by(+number) - push([:dupn, number]) + push(DupN.new(number)) end - def expandarray(length, flag) - stack.change_by(-1 + length) - push([:expandarray, length, flag]) + def expandarray(length, flags) + push(ExpandArray.new(length, flags)) end def getblockparam(index, level) - stack.change_by(+1) - push([:getblockparam, index, level]) + push(GetBlockParam.new(index, level)) end def getblockparamproxy(index, level) - stack.change_by(+1) - push([:getblockparamproxy, index, level]) + push(GetBlockParamProxy.new(index, level)) end def getclassvariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.0" - push([:getclassvariable, name, inline_storage_for(name)]) + if RUBY_VERSION < "3.0" + push(GetClassVariableUncached.new(name)) else - push([:getclassvariable, name]) + push(GetClassVariable.new(name, inline_storage_for(name))) end end def getconstant(name) - stack.change_by(-2 + 1) - push([:getconstant, name]) + push(GetConstant.new(name)) end def getglobal(name) - stack.change_by(+1) - push([:getglobal, name]) + push(GetGlobal.new(name)) end def getinstancevariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.2" - push([:getinstancevariable, name, inline_storage]) + if RUBY_VERSION < "3.2" + push(GetInstanceVariable.new(name, inline_storage_for(name))) else - inline_storage = inline_storage_for(name) - push([:getinstancevariable, name, inline_storage]) + push(GetInstanceVariable.new(name, inline_storage)) end end def getlocal(index, level) - stack.change_by(+1) - if operands_unification # Specialize the getlocal instruction based on the level of the # local variable. If it's 0 or 1, then there's a specialized @@ -439,14 +454,14 @@ def getlocal(index, level) # scope, respectively, and requires fewer operands. case level when 0 - push([:getlocal_WC_0, index]) + push(GetLocalWC0.new(index)) when 1 - push([:getlocal_WC_1, index]) + push(GetLocalWC1.new(index)) else - push([:getlocal, index, level]) + push(GetLocal.new(index, level)) end else - push([:getlocal, index, level]) + push(GetLocal.new(index, level)) end end @@ -762,38 +777,6 @@ def toregexp(options, length) def call_data(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) { mid: method_id, flag: flag, orig_argc: argc } end - - def serialize(insn) - case insn[0] - when :checkkeyword, :getblockparam, :getblockparamproxy, :getlocal_WC_0, - :getlocal_WC_1, :getlocal, :setlocal_WC_0, :setlocal_WC_1, - :setlocal - iseq = self - - case insn[0] - when :getlocal_WC_1, :setlocal_WC_1 - iseq = iseq.parent_iseq - when :getblockparam, :getblockparamproxy, :getlocal, :setlocal - insn[2].times { iseq = iseq.parent_iseq } - end - - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] - when :defineclass - [insn[0], insn[1], insn[2].to_a, insn[3]] - when :definemethod, :definesmethod - [insn[0], insn[1], insn[2].to_a] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - when :once - [insn[0], insn[1].to_a, insn[2]] - else - insn - end - end end # These constants correspond to the putspecialobject instruction. They are @@ -819,34 +802,5 @@ def serialize(insn) VM_CALL_ZSUPER = 1 << 10 VM_CALL_OPT_SEND = 1 << 11 VM_CALL_KW_SPLAT_MUT = 1 << 12 - - # These constants correspond to the value passed as part of the defined - # instruction. It's an enum defined in the CRuby codebase that tells that - # instruction what kind of defined check to perform. - DEFINED_NIL = 1 - DEFINED_IVAR = 2 - DEFINED_LVAR = 3 - DEFINED_GVAR = 4 - DEFINED_CVAR = 5 - DEFINED_CONST = 6 - DEFINED_METHOD = 7 - DEFINED_YIELD = 8 - DEFINED_ZSUPER = 9 - DEFINED_SELF = 10 - DEFINED_TRUE = 11 - DEFINED_FALSE = 12 - DEFINED_ASGN = 13 - DEFINED_EXPR = 14 - DEFINED_REF = 15 - DEFINED_FUNC = 16 - DEFINED_CONST_FROM = 17 - - # These constants correspond to the value passed in the flags as part of - # the defineclass instruction. - VM_DEFINECLASS_TYPE_CLASS = 0 - VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 - VM_DEFINECLASS_TYPE_MODULE = 2 - VM_DEFINECLASS_FLAG_SCOPED = 8 - VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 end end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index 16098190..05c05705 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -5,460 +5,171 @@ module YARV # Parses the given source code into a syntax tree, compiles that syntax tree # into YARV bytecode. class Bf - class Node - def format(q) - Format.new(q).visit(self) - end - - def pretty_print(q) - PrettyPrint.new(q).visit(self) - end - end - - # The root node of the syntax tree. - class Root < Node - attr_reader :nodes, :location - - def initialize(nodes:, location:) - @nodes = nodes - @location = location - end - - def accept(visitor) - visitor.visit_root(self) - end - - def child_nodes - nodes - end - - alias deconstruct child_nodes - - def deconstruct_keys(keys) - { nodes: nodes, location: location } - end - end - - # [ ... ] - class Loop < Node - attr_reader :nodes, :location - - def initialize(nodes:, location:) - @nodes = nodes - @location = location - end - - def accept(visitor) - visitor.visit_loop(self) - end - - def child_nodes - nodes - end - - alias deconstruct child_nodes - - def deconstruct_keys(keys) - { nodes: nodes, location: location } - end - end - - # + - class Increment < Node - attr_reader :location - - def initialize(location:) - @location = location - end - - def accept(visitor) - visitor.visit_increment(self) - end - - def child_nodes - [] - end - - alias deconstruct child_nodes + attr_reader :source - def deconstruct_keys(keys) - { value: "+", location: location } - end + def initialize(source) + @source = source end - # - - class Decrement < Node - attr_reader :location - - def initialize(location:) - @location = location - end - - def accept(visitor) - visitor.visit_decrement(self) - end - - def child_nodes - [] - end - - alias deconstruct child_nodes + def compile + # Set up the top-level instruction sequence that will be returned. + iseq = InstructionSequence.new(:top, "", nil, location) + + # Set up the $tape global variable that will hold our state. + iseq.duphash({ 0 => 0 }) + iseq.setglobal(:$tape) + iseq.getglobal(:$tape) + iseq.putobject(0) + iseq.send(:default=, 1) + + # Set up the $cursor global variable that will hold the current position + # in the tape. + iseq.putobject(0) + iseq.setglobal(:$cursor) + + stack = [] + source + .each_char + .chunk do |char| + # For each character, we're going to assign a type to it. This + # allows a couple of optimizations to be made by combining multiple + # instructions into single instructions, e.g., +++ becomes a single + # change_by(3) instruction. + case char + when "+", "-" + :change + when ">", "<" + :shift + when "." + :output + when "," + :input + when "[", "]" + :loop + else + :ignored + end + end + .each do |type, chunk| + # For each chunk, we're going to emit the appropriate instruction. + case type + when :change + change_by(iseq, chunk.count("+") - chunk.count("-")) + when :shift + shift_by(iseq, chunk.count(">") - chunk.count("<")) + when :output + chunk.length.times { output_char(iseq) } + when :input + chunk.length.times { input_char(iseq) } + when :loop + chunk.each do |char| + case char + when "[" + stack << loop_start(iseq) + when "]" + loop_end(iseq, *stack.pop) + end + end + end + end - def deconstruct_keys(keys) - { value: "-", location: location } - end + iseq.leave + iseq end - # > - class ShiftRight < Node - attr_reader :location - - def initialize(location:) - @location = location - end - - def accept(visitor) - visitor.visit_shift_right(self) - end - - def child_nodes - [] - end - - alias deconstruct child_nodes + private - def deconstruct_keys(keys) - { value: ">", location: location } - end + # This is the location of the top instruction sequence, derived from the + # source string. + def location + Location.new( + start_line: 1, + start_char: 0, + start_column: 0, + end_line: source.count("\n") + 1, + end_char: source.size, + end_column: source.size - (source.rindex("\n") || 0) - 1 + ) end - # < - class ShiftLeft < Node - attr_reader :location - - def initialize(location:) - @location = location - end + # $tape[$cursor] += value + def change_by(iseq, value) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) - def accept(visitor) - visitor.visit_shift_left(self) - end + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1) - def child_nodes - [] + if value < 0 + iseq.putobject(-value) + iseq.send(:-, 1) + else + iseq.putobject(value) + iseq.send(:+, 1) end - alias deconstruct child_nodes - - def deconstruct_keys(keys) - { value: "<", location: location } - end + iseq.send(:[]=, 2) end - # , - class Input < Node - attr_reader :location - - def initialize(location:) - @location = location - end + # $cursor += value + def shift_by(iseq, value) + iseq.getglobal(:$cursor) - def accept(visitor) - visitor.visit_input(self) + if value < 0 + iseq.putobject(-value) + iseq.send(:-, 1) + else + iseq.putobject(value) + iseq.send(:+, 1) end - def child_nodes - [] - end - - alias deconstruct child_nodes - - def deconstruct_keys(keys) - { value: ",", location: location } - end + iseq.setglobal(:$cursor) end - # . - class Output < Node - attr_reader :location + # $stdout.putc($tape[$cursor].chr) + def output_char(iseq) + iseq.getglobal(:$stdout) - def initialize(location:) - @location = location - end - - def accept(visitor) - visitor.visit_output(self) - end - - def child_nodes - [] - end - - alias deconstruct child_nodes - - def deconstruct_keys(keys) - { value: ".", location: location } - end - end + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1) + iseq.send(:chr, 0) - # Allows visiting the syntax tree recursively. - class Visitor - def visit(node) - node.accept(self) - end - - def visit_all(nodes) - nodes.map { |node| visit(node) } - end - - def visit_child_nodes(node) - visit_all(node.child_nodes) - end - - # Visit a Root node. - alias visit_root visit_child_nodes - - # Visit a Loop node. - alias visit_loop visit_child_nodes - - # Visit an Increment node. - alias visit_increment visit_child_nodes - - # Visit a Decrement node. - alias visit_decrement visit_child_nodes - - # Visit a ShiftRight node. - alias visit_shift_right visit_child_nodes - - # Visit a ShiftLeft node. - alias visit_shift_left visit_child_nodes - - # Visit an Input node. - alias visit_input visit_child_nodes - - # Visit an Output node. - alias visit_output visit_child_nodes + iseq.send(:putc, 1) end - # Compiles the syntax tree into YARV bytecode. - class Compiler < Visitor - attr_reader :iseq - - def initialize - @iseq = InstructionSequence.new(:top, "", nil, Location.default) - end - - def visit_decrement(node) - change_by(-1) - end - - def visit_increment(node) - change_by(1) - end - - def visit_input(node) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.getglobal(:$stdin) - iseq.send(:getc, 0) - iseq.send(:ord, 0) - iseq.send(:[]=, 2) - end - - def visit_loop(node) - start_label = iseq.label - - # First, we're going to compare the value at the current cursor to 0. - # If it's 0, then we'll jump past the loop. Otherwise we'll execute - # the loop. - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(:[], 1) - iseq.putobject(0) - iseq.send(:==, 1) - branchunless = iseq.branchunless(-1) - - # Otherwise, here we'll execute the loop. - visit_nodes(node.nodes) - - # Now that we've visited all of the child nodes, we need to jump back - # to the start of the loop. - iseq.jump(start_label) - - # Now that we have all of the instructions in place, we can patch the - # branchunless to point to the next instruction for skipping the loop. - branchunless[1] = iseq.label - end - - def visit_output(node) - iseq.getglobal(:$stdout) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(:[], 1) - iseq.send(:chr, 0) - iseq.send(:putc, 1) - end - - def visit_root(node) - iseq.duphash({ 0 => 0 }) - iseq.setglobal(:$tape) - iseq.getglobal(:$tape) - iseq.putobject(0) - iseq.send(:default=, 1) - - iseq.putobject(0) - iseq.setglobal(:$cursor) - - visit_nodes(node.nodes) - - iseq.leave - iseq - end - - def visit_shift_left(node) - shift_by(-1) - end - - def visit_shift_right(node) - shift_by(1) - end - - private - - def change_by(value) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(:[], 1) - - if value < 0 - iseq.putobject(-value) - iseq.send(:-, 1) - else - iseq.putobject(value) - iseq.send(:+, 1) - end - - iseq.send(:[]=, 2) - end - - def shift_by(value) - iseq.getglobal(:$cursor) - - if value < 0 - iseq.putobject(-value) - iseq.send(:-, 1) - else - iseq.putobject(value) - iseq.send(:+, 1) - end + # $tape[$cursor] = $stdin.getc.ord + def input_char(iseq) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) - iseq.setglobal(:$cursor) - end + iseq.getglobal(:$stdin) + iseq.send(:getc, 0) + iseq.send(:ord, 0) - def visit_nodes(nodes) - nodes - .chunk do |child| - case child - when Increment, Decrement - :change - when ShiftLeft, ShiftRight - :shift - else - :default - end - end - .each do |type, children| - case type - when :change - value = 0 - children.each { |child| value += child.is_a?(Increment) ? 1 : -1 } - change_by(value) - when :shift - value = 0 - children.each { |child| value += child.is_a?(ShiftRight) ? 1 : -1 } - shift_by(value) - else - visit_all(children) - end - end - end + iseq.send(:[]=, 2) end - class Error < StandardError - end + # unless $tape[$cursor] == 0 + def loop_start(iseq) + start_label = iseq.label - attr_reader :source + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1) - def initialize(source) - @source = source - end + iseq.putobject(0) + iseq.send(:==, 1) - def compile - Root.new(nodes: parse_segment(source, 0), location: 0...source.length).accept(Compiler.new) + branchunless = iseq.branchunless(-1) + [start_label, branchunless] end - private - - def parse_segment(segment, offset) - index = 0 - nodes = [] - - while index < segment.length - location = offset + index - - case segment[index] - when "+" - nodes << Increment.new(location: location...(location + 1)) - index += 1 - when "-" - nodes << Decrement.new(location: location...(location + 1)) - index += 1 - when ">" - nodes << ShiftRight.new(location: location...(location + 1)) - index += 1 - when "<" - nodes << ShiftLeft.new(location: location...(location + 1)) - index += 1 - when "." - nodes << Output.new(location: location...(location + 1)) - index += 1 - when "," - nodes << Input.new(location: location...(location + 1)) - index += 1 - when "[" - matched = 1 - end_index = index + 1 - - while matched != 0 && end_index < segment.length - case segment[end_index] - when "[" - matched += 1 - when "]" - matched -= 1 - end - - end_index += 1 - end - - raise Error, "Unmatched start loop" if matched != 0 - - content = segment[(index + 1)...(end_index - 1)] - nodes << Loop.new( - nodes: parse_segment(content, offset + index + 1), - location: location...(offset + end_index) - ) - - index = end_index - when "]" - raise Error, "Unmatched end loop" - else - index += 1 - end - end - - nodes + # Jump back to the start of the loop. + def loop_end(iseq, start_label, branchunless) + iseq.jump(start_label) + branchunless.patch!(iseq) end end end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 566ed984..7a6e8893 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -5,15 +5,33 @@ module YARV # This class is responsible for taking a compiled instruction sequence and # walking through it to generate equivalent Ruby code. class Disassembler + # When we're disassmebling, we use a looped case statement to emulate + # jumping around in the same way the virtual machine would. This class + # provides convenience methods for generating the AST nodes that have to + # do with that label. + class DisasmLabel + include DSL + attr_reader :name + + def initialize(name) + @name = name + end + + def field + VarField(Ident(name)) + end + + def ref + VarRef(Ident(name)) + end + end + include DSL - attr_reader :iseq, :label_name, :label_field, :label_ref + attr_reader :iseq, :disasm_label def initialize(iseq) @iseq = iseq - - @label_name = "__disasm_label" - @label_field = VarField(Ident(label_name)) - @label_ref = VarRef(Ident(label_name)) + @disasm_label = DisasmLabel.new("__disasm_label") end def to_ruby @@ -37,143 +55,198 @@ def disassemble(iseq) clause = [] iseq.insns.each do |insn| - if insn.is_a?(Symbol) && insn.start_with?("label_") - clause << Assign(label_field, node_for(insn)) unless clause.last.is_a?(Next) - clauses[label] = clause - clause = [] - label = insn - next - end + case insn + when Symbol + if insn.start_with?("label_") + unless clause.last.is_a?(Next) + clause << Assign(disasm_label.field, node_for(insn)) + end + + clauses[label] = clause + clause = [] + label = insn + end + when BranchUnless + body = [ + Assign(disasm_label.field, node_for(insn.label)), + Next(Args([])) + ] - case insn[0] - when :branchunless - clause << IfNode(clause.pop, Statements([Assign(label_field, node_for(insn[1])), Next(Args([]))]), nil) - when :dup + clause << IfNode(clause.pop, Statements(body), nil) + when Dup clause << clause.last - when :duphash - assocs = insn[1].map { |key, value| Assoc(node_for(key), node_for(value)) } + when DupHash + assocs = + insn.object.map do |key, value| + Assoc(node_for(key), node_for(value)) + end + clause << HashLiteral(LBrace("{"), assocs) - when :getglobal - clause << VarRef(GVar(insn[1].to_s)) - when :getlocal_WC_0 - clause << VarRef(Ident(local_name(insn[1], 0))) - when :jump - clause << Assign(label_field, node_for(insn[1])) - clause << Next(Args([])) - when :leave - value = Args([clause.pop]) - clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) - when :opt_and - left, right = clause.pop(2) - clause << Binary(left, :&, right) - when :opt_aref - collection, arg = clause.pop(2) - clause << ARef(collection, Args([arg])) - when :opt_aset - collection, arg, value = clause.pop(3) - - if value.is_a?(Binary) && value.left.is_a?(ARef) && collection === value.left.collection && arg === value.left.index.parts[0] - clause << OpAssign(ARefField(collection, Args([arg])), Op("#{value.operator}="), value.right) - else - clause << Assign(ARefField(collection, Args([arg])), value) - end - when :opt_div - left, right = clause.pop(2) - clause << Binary(left, :/, right) - when :opt_eq - left, right = clause.pop(2) - clause << Binary(left, :==, right) - when :opt_ge - left, right = clause.pop(2) - clause << Binary(left, :>=, right) - when :opt_gt - left, right = clause.pop(2) - clause << Binary(left, :>, right) - when :opt_le - left, right = clause.pop(2) - clause << Binary(left, :<=, right) - when :opt_lt - left, right = clause.pop(2) - clause << Binary(left, :<, right) - when :opt_ltlt - left, right = clause.pop(2) - clause << Binary(left, :<<, right) - when :opt_minus - left, right = clause.pop(2) - clause << Binary(left, :-, right) - when :opt_mod - left, right = clause.pop(2) - clause << Binary(left, :%, right) - when :opt_mult - left, right = clause.pop(2) - clause << Binary(left, :*, right) - when :opt_neq - left, right = clause.pop(2) - clause << Binary(left, :"!=", right) - when :opt_or - left, right = clause.pop(2) - clause << Binary(left, :|, right) - when :opt_plus - left, right = clause.pop(2) - clause << Binary(left, :+, right) - when :opt_send_without_block - if insn[1][:flag] & VM_CALL_FCALL > 0 - if insn[1][:orig_argc] == 0 - clause.pop - clause << CallNode(nil, nil, Ident(insn[1][:mid]), Args([])) - elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") - _receiver, argument = clause.pop(2) - clause << Assign(CallNode(nil, nil, Ident(insn[1][:mid][0..-2]), nil), argument) + when GetGlobal + clause << VarRef(GVar(insn.name.to_s)) + when GetLocalWC0 + local = iseq.local_table.locals[insn.index] + clause << VarRef(Ident(local.name.to_s)) + when Array + case insn[0] + when :jump + clause << Assign(disasm_label.field, node_for(insn[1])) + clause << Next(Args([])) + when :leave + value = Args([clause.pop]) + clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) + when :opt_and + left, right = clause.pop(2) + clause << Binary(left, :&, right) + when :opt_aref + collection, arg = clause.pop(2) + clause << ARef(collection, Args([arg])) + when :opt_aset + collection, arg, value = clause.pop(3) + + clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && + collection === value.left.collection && + arg === value.left.index.parts[0] + OpAssign( + ARefField(collection, Args([arg])), + Op("#{value.operator}="), + value.right + ) else - _receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) - clause << CallNode(nil, nil, Ident(insn[1][:mid]), ArgParen(Args(arguments))) + Assign(ARefField(collection, Args([arg])), value) end - else - if insn[1][:orig_argc] == 0 - clause << CallNode(clause.pop, Period("."), Ident(insn[1][:mid]), nil) - elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") - receiver, argument = clause.pop(2) - clause << Assign(CallNode(receiver, Period("."), Ident(insn[1][:mid][0..-2]), nil), argument) + when :opt_div + left, right = clause.pop(2) + clause << Binary(left, :/, right) + when :opt_eq + left, right = clause.pop(2) + clause << Binary(left, :==, right) + when :opt_ge + left, right = clause.pop(2) + clause << Binary(left, :>=, right) + when :opt_gt + left, right = clause.pop(2) + clause << Binary(left, :>, right) + when :opt_le + left, right = clause.pop(2) + clause << Binary(left, :<=, right) + when :opt_lt + left, right = clause.pop(2) + clause << Binary(left, :<, right) + when :opt_ltlt + left, right = clause.pop(2) + clause << Binary(left, :<<, right) + when :opt_minus + left, right = clause.pop(2) + clause << Binary(left, :-, right) + when :opt_mod + left, right = clause.pop(2) + clause << Binary(left, :%, right) + when :opt_mult + left, right = clause.pop(2) + clause << Binary(left, :*, right) + when :opt_neq + left, right = clause.pop(2) + clause << Binary(left, :"!=", right) + when :opt_or + left, right = clause.pop(2) + clause << Binary(left, :|, right) + when :opt_plus + left, right = clause.pop(2) + clause << Binary(left, :+, right) + when :opt_send_without_block + if insn[1][:flag] & VM_CALL_FCALL > 0 + if insn[1][:orig_argc] == 0 + clause.pop + clause << CallNode(nil, nil, Ident(insn[1][:mid]), Args([])) + elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") + _receiver, argument = clause.pop(2) + clause << Assign( + CallNode(nil, nil, Ident(insn[1][:mid][0..-2]), nil), + argument + ) + else + _receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) + clause << CallNode( + nil, + nil, + Ident(insn[1][:mid]), + ArgParen(Args(arguments)) + ) + end else - receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) - clause << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) + if insn[1][:orig_argc] == 0 + clause << CallNode( + clause.pop, + Period("."), + Ident(insn[1][:mid]), + nil + ) + elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign( + CallNode( + receiver, + Period("."), + Ident(insn[1][:mid][0..-2]), + nil + ), + argument + ) + else + receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) + clause << CallNode( + receiver, + Period("."), + Ident(insn[1][:mid]), + ArgParen(Args(arguments)) + ) + end end - end - when :putobject - case insn[1] - when Float - clause << FloatLiteral(insn[1].inspect) - when Integer - clause << Int(insn[1].inspect) - else - raise "Unknown object type: #{insn[1].class.name}" - end - when :putobject_INT2FIX_0_ - clause << Int("0") - when :putobject_INT2FIX_1_ - clause << Int("1") - when :putself - clause << VarRef(Kw("self")) - when :setglobal - target = GVar(insn[1].to_s) - value = clause.pop - - if value.is_a?(Binary) && VarRef(target) === value.left - clause << OpAssign(VarField(target), Op("#{value.operator}="), value.right) - else - clause << Assign(VarField(target), value) - end - when :setlocal_WC_0 - target = Ident(local_name(insn[1], 0)) - value = clause.pop + when :putobject + case insn[1] + when Float + clause << FloatLiteral(insn[1].inspect) + when Integer + clause << Int(insn[1].inspect) + else + raise "Unknown object type: #{insn[1].class.name}" + end + when :putobject_INT2FIX_0_ + clause << Int("0") + when :putobject_INT2FIX_1_ + clause << Int("1") + when :putself + clause << VarRef(Kw("self")) + when :setglobal + target = GVar(insn[1].to_s) + value = clause.pop - if value.is_a?(Binary) && VarRef(target) === value.left - clause << OpAssign(VarField(target), Op("#{value.operator}="), value.right) + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign( + VarField(target), + Op("#{value.operator}="), + value.right + ) + else + Assign(VarField(target), value) + end + when :setlocal_WC_0 + target = Ident(local_name(insn[1], 0)) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign( + VarField(target), + Op("#{value.operator}="), + value.right + ) + else + Assign(VarField(target), value) + end else - clause << Assign(VarField(target), value) + raise "Unknown instruction #{insn[0]}" end - else - raise "Unknown instruction #{insn[0]}" end end @@ -185,31 +258,44 @@ def disassemble(iseq) # Here we're going to build up a big case statement that will handle all # of the different labels. current = nil - clauses.reverse_each do |label, clause| - current = When(Args([node_for(label)]), Statements(clause), current) + clauses.reverse_each do |current_label, current_clause| + current = + When( + Args([node_for(current_label)]), + Statements(current_clause), + current + ) end - switch = Case(Kw("case"), label_ref, current) + switch = Case(Kw("case"), disasm_label.ref, current) # Here we're going to make sure that any locals that were established in # the label_0 block are initialized so that scoping rules work # correctly. stack = [] - locals = [label_name] + locals = [disasm_label.name] clauses[:label_0].each do |node| - if node.is_a?(Assign) && node.target.is_a?(VarField) && node.target.value.is_a?(Ident) + if node.is_a?(Assign) && node.target.is_a?(VarField) && + node.target.value.is_a?(Ident) value = node.target.value.value next if locals.include?(value) stack << Assign(node.target, VarRef(Kw("nil"))) - locals << value + locals << value end end # Finally, we'll set up the initial label and loop the entire case # statement. - stack << Assign(label_field, node_for(:label_0)) - stack << MethodAddBlock(CallNode(nil, nil, Ident("loop"), Args([])), BlockNode(Kw("do"), nil, BodyStmt(Statements([switch]), nil, nil, nil, nil))) + stack << Assign(disasm_label.field, node_for(:label_0)) + stack << MethodAddBlock( + CallNode(nil, nil, Ident("loop"), Args([])), + BlockNode( + Kw("do"), + nil, + BodyStmt(Statements([switch]), nil, nil, nil, nil) + ) + ) Statements(stack) end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb new file mode 100644 index 00000000..c50c5c84 --- /dev/null +++ b/lib/syntax_tree/yarv/instructions.rb @@ -0,0 +1,1071 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # ### Summary + # + # `adjuststack` accepts a single integer argument and removes that many + # elements from the top of the stack. + # + # ### Usage + # + # ~~~ruby + # x = [true] + # x[0] ||= nil + # x[0] + # ~~~ + # + class AdjustStack + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:adjuststack, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 0 + end + end + + # ### Summary + # + # `anytostring` ensures that the value on top of the stack is a string. + # + # It pops two values off the stack. If the first value is a string it + # pushes it back on the stack. If the first value is not a string, it uses + # Ruby's built in string coercion to coerce the second value to a string + # and then pushes that back on the stack. + # + # This is used in conjunction with `objtostring` as a fallback for when an + # object's `to_s` method does not return a string. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class AnyToString + def to_a(_iseq) + [:anytostring] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `branchif` has one argument: the jump index. It pops one value off the + # stack: the jump condition. + # + # If the value popped off the stack is true, `branchif` jumps to + # the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # x = true + # x ||= "foo" + # puts x + # ~~~ + # + class BranchIf + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:branchif, label] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `branchnil` has one argument: the jump index. It pops one value off the + # stack: the jump condition. + # + # If the value popped off the stack is nil, `branchnil` jumps to + # the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # x = nil + # if x&.to_s + # puts "hi" + # end + # ~~~ + # + class BranchNil + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:branchnil, label] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `branchunless` has one argument: the jump index. It pops one value off + # the stack: the jump condition. + # + # If the value popped off the stack is false or nil, `branchunless` jumps + # to the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # if 2 + 3 + # puts "foo" + # end + # ~~~ + # + class BranchUnless + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:branchunless, label] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `checkkeyword` checks if a keyword was passed at the callsite that + # called into the method represented by the instruction sequence. It has + # two arguments: the index of the local variable that stores the keywords + # metadata and the index of the keyword within that metadata. It pushes + # a boolean onto the stack indicating whether or not the keyword was + # given. + # + # ### Usage + # + # ~~~ruby + # def evaluate(value: rand) + # value + # end + # + # evaluate(value: 3) + # ~~~ + # + class CheckKeyword + attr_reader :keyword_bits_index, :keyword_index + + def initialize(keyword_bits_index, keyword_index) + @keyword_bits_index = keyword_bits_index + @keyword_index = keyword_index + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(iseq) + [ + :checkkeyword, + iseq.local_table.offset(keyword_bits_index), + keyword_index + ] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `concatarray` concatenates the two Arrays on top of the stack. + # + # It coerces the two objects at the top of the stack into Arrays by + # calling `to_a` if necessary, and makes sure to `dup` the first Array if + # it was already an Array, to avoid mutating it when concatenating. + # + # ### Usage + # + # ~~~ruby + # [1, *2] + # ~~~ + # + class ConcatArray + def to_a(_iseq) + [:concatarray] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `concatstrings` pops a number of strings from the stack joins them + # together into a single string and pushes that string back on the stack. + # + # This does no coercion and so is always used in conjunction with + # `objtostring` and `anytostring` to ensure the stack contents are always + # strings. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class ConcatStrings + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:concatstrings, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `defined` checks if the top value of the stack is defined. If it is, it + # pushes its value onto the stack. Otherwise it pushes `nil`. + # + # ### Usage + # + # ~~~ruby + # defined?(x) + # ~~~ + # + class Defined + NIL = 1 + IVAR = 2 + LVAR = 3 + GVAR = 4 + CVAR = 5 + CONST = 6 + METHOD = 7 + YIELD = 8 + ZSUPER = 9 + SELF = 10 + TRUE = 11 + FALSE = 12 + ASGN = 13 + EXPR = 14 + REF = 15 + FUNC = 16 + CONST_FROM = 17 + + attr_reader :type, :name, :message + + def initialize(type, name, message) + @type = type + @name = name + @message = message + end + + def to_a(_iseq) + [:defined, type, name, message] + end + + def length + 4 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `defineclass` defines a class. First it pops the superclass off the + # stack, then it pops the object off the stack that the class should be + # defined under. It has three arguments: the name of the constant, the + # instruction sequence associated with the class, and various flags that + # indicate if it is a singleton class, a module, or a regular class. + # + # ### Usage + # + # ~~~ruby + # class Foo + # end + # ~~~ + # + class DefineClass + TYPE_CLASS = 0 + TYPE_SINGLETON_CLASS = 1 + TYPE_MODULE = 2 + FLAG_SCOPED = 8 + FLAG_HAS_SUPERCLASS = 16 + + attr_reader :name, :class_iseq, :flags + + def initialize(name, class_iseq, flags) + @name = name + @class_iseq = class_iseq + @flags = flags + end + + def to_a(_iseq) + [:defineclass, name, class_iseq.to_a, flags] + end + + def length + 4 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `definemethod` defines a method on the class of the current value of + # `self`. It accepts two arguments. The first is the name of the method + # being defined. The second is the instruction sequence representing the + # body of the method. + # + # ### Usage + # + # ~~~ruby + # def value = "value" + # ~~~ + # + class DefineMethod + attr_reader :name, :method_iseq + + def initialize(name, method_iseq) + @name = name + @method_iseq = method_iseq + end + + def to_a(_iseq) + [:definemethod, name, method_iseq.to_a] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `definesmethod` defines a method on the singleton class of the current + # value of `self`. It accepts two arguments. The first is the name of the + # method being defined. The second is the instruction sequence representing + # the body of the method. It pops the object off the stack that the method + # should be defined on. + # + # ### Usage + # + # ~~~ruby + # def self.value = "value" + # ~~~ + # + class DefineSMethod + attr_reader :name, :method_iseq + + def initialize(name, method_iseq) + @name = name + @method_iseq = method_iseq + end + + def to_a(_iseq) + [:definesmethod, name, method_iseq.to_a] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `dup` copies the top value of the stack and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # $global = 5 + # ~~~ + # + class Dup + def to_a(_iseq) + [:dup] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 2 + end + end + + # ### Summary + # + # `duparray` dups an Array literal and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # [true] + # ~~~ + # + class DupArray + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:duparray, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `duphash` dups a Hash literal and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # { a: 1 } + # ~~~ + # + class DupHash + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:duphash, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `dupn` duplicates the top `n` stack elements. + # + # ### Usage + # + # ~~~ruby + # Object::X ||= true + # ~~~ + # + class DupN + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:dupn, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + number * 2 + end + end + + # ### Summary + # + # `expandarray` looks at the top of the stack, and if the value is an array + # it replaces it on the stack with `number` elements of the array, or `nil` + # if the elements are missing. + # + # ### Usage + # + # ~~~ruby + # x, = [true, false, nil] + # ~~~ + # + class ExpandArray + attr_reader :number, :flags + + def initialize(number, flags) + @number = number + @flags = flags + end + + def to_a(_iseq) + [:expandarray, number, flags] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + number + end + end + + # ### Summary + # + # `getblockparam` is a similar instruction to `getlocal` in that it looks + # for a local variable in the current instruction sequence's local table and + # walks recursively up the parent instruction sequences until it finds it. + # The local it retrieves, however, is a special block local that was passed + # to the current method. It pushes the value of the block local onto the + # stack. + # + # ### Usage + # + # ~~~ruby + # def foo(&block) + # block + # end + # ~~~ + # + class GetBlockParam + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = iseq.parent_iseq } + [:getblockparam, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getblockparamproxy` is almost the same as `getblockparam` except that it + # pushes a proxy object onto the stack instead of the actual value of the + # block local. This is used when a method is being called on the block + # local. + # + # ### Usage + # + # ~~~ruby + # def foo(&block) + # block.call + # end + # ~~~ + # + class GetBlockParamProxy + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = iseq.parent_iseq } + [:getblockparamproxy, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. It uses an inline cache to reduce the + # need to lookup the class variable in the class hierarchy every time. + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:getclassvariable, name, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. + # + # This version of the `getclassvariable` instruction is no longer used since + # in Ruby 3.0 it gained an inline cache.` + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariableUncached + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getclassvariable, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getconstant` performs a constant lookup and pushes the value of the + # constant onto the stack. It pops both the class it should look in and + # whether or not it should look globally as well. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class GetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getglobal` pushes the value of a global variables onto the stack. + # + # ### Usage + # + # ~~~ruby + # $$ + # ~~~ + # + class GetGlobal + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getglobal, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getinstancevariable` pushes the value of an instance variable onto the + # stack. It uses an inline cache to avoid having to look up the instance + # variable in the class hierarchy every time. + # + # This instruction has two forms, but both have the same structure. Before + # Ruby 3.2, the inline cache corresponded to both the get and set + # instructions and could be shared. Since Ruby 3.2, it uses object shapes + # instead so the caches are unique per instruction. + # + # ### Usage + # + # ~~~ruby + # @instance_variable + # ~~~ + # + class GetInstanceVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:getinstancevariable, name, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getlocal_WC_0` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the current frame determined by + # the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # value + # ~~~ + # + class GetLocalWC0 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:getlocal_WC_0, iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getlocal_WC_1` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the parent frame determined by + # the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # self.then { value } + # ~~~ + # + class GetLocalWC1 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getlocal` fetches the value of a local variable from a frame determined + # by the level and index arguments. The level is the number of frames back + # to look and the index is the index in the local table. It pushes the value + # it finds onto the stack. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # tap { tap { value } } + # ~~~ + # + class GetLocal + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:getlocal, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + end +end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index da348224..55cdb657 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -26,7 +26,7 @@ class YARVTest < Minitest::Test "1 << 2" => "break 1 << 2\n", "1 >> 2" => "break 1.>>(2)\n", "1 ** 2" => "break 1.**(2)\n", - "a = 1; a" => "a = 1\nbreak a\n", + "a = 1; a" => "a = 1\nbreak a\n" }.freeze CASES.each do |source, expected| @@ -35,6 +35,15 @@ class YARVTest < Minitest::Test end end + def test_bf + hello_world = + "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]" \ + ">>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++." + + iseq = YARV::Bf.new(hello_world).compile + Formatter.format(hello_world, YARV::Disassembler.new(iseq).to_ruby) + end + private def assert_disassembles(expected, source) From 441bc01d9f68e07c3acd891c915f950652f70176 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 22:00:09 -0500 Subject: [PATCH 12/21] opt_aref_with --- lib/syntax_tree/compiler.rb | 29 +++++++++++++++++++++++++++++ lib/syntax_tree/yarv.rb | 5 +++++ test/compiler_test.rb | 1 + 3 files changed, 35 insertions(+) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 8327a080..106c3ca3 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -158,6 +158,21 @@ def visit_tstring_content(node) node.value end + def visit_var_ref(node) + raise CompilationError unless node.value.is_a?(Kw) + + case node.value.value + when "nil" + nil + when "true" + true + when "false" + false + else + raise CompilationError + end + end + def visit_word(node) if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) node.parts.first.value @@ -258,6 +273,20 @@ def visit_alias(node) def visit_aref(node) visit(node.collection) + + if !frozen_string_literal && specialized_instruction && (node.index.parts.length == 1) + arg = node.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + iseq.opt_aref_with(string_part.value, :[], 1) + return + end + end + end + visit(node.index) iseq.send(:[], 1) end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index a29714a5..57a21f2c 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -527,6 +527,11 @@ def once(postexe_iseq, inline_storage) push([:once, postexe_iseq, inline_storage]) end + def opt_aref_with(object, method_id, argc, flag = VM_CALL_ARGS_SIMPLE) + stack.change_by(-1 + 1) + push([:opt_aref_with, object, call_data(method_id, argc, flag)]) + end + def opt_getconstant_path(names) if RUBY_VERSION >= "3.2" stack.change_by(+1) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 27bf993d..485e92fc 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -201,6 +201,7 @@ class CompilerTest < Minitest::Test "foo[bar] ||= 1", "foo[bar] <<= 1", "foo[bar] ^= 1", + "foo['true']", # Constants (single) "Foo", "Foo = 1", From cc24d7f4198beb08cb3c37e244535afee013554b Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 22:04:03 -0500 Subject: [PATCH 13/21] opt_aset_with --- lib/syntax_tree/compiler.rb | 18 ++++++++++++++++++ lib/syntax_tree/yarv.rb | 5 +++++ test/compiler_test.rb | 1 + 3 files changed, 24 insertions(+) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 106c3ca3..91ec3d30 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -337,6 +337,24 @@ def visit_array(node) def visit_assign(node) case node.target when ARefField + if !frozen_string_literal && specialized_instruction && (node.target.index.parts.length == 1) + arg = node.target.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + visit(node.target.collection) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.opt_aset_with(string_part.value, :[]=, 2) + iseq.pop + return + end + end + end + iseq.putnil visit(node.target.collection) visit(node.target.index) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 57a21f2c..0c4c3fc9 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -532,6 +532,11 @@ def opt_aref_with(object, method_id, argc, flag = VM_CALL_ARGS_SIMPLE) push([:opt_aref_with, object, call_data(method_id, argc, flag)]) end + def opt_aset_with(object, method_id, argc, flag = VM_CALL_ARGS_SIMPLE) + stack.change_by(-2 + 1) + push([:opt_aset_with, object, call_data(method_id, argc, flag)]) + end + def opt_getconstant_path(names) if RUBY_VERSION >= "3.2" stack.change_by(+1) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 485e92fc..98559664 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -202,6 +202,7 @@ class CompilerTest < Minitest::Test "foo[bar] <<= 1", "foo[bar] ^= 1", "foo['true']", + "foo['true'] = 1", # Constants (single) "Foo", "Foo = 1", From 5bd3463db4f0c4b24fb7068c73be802c7b49e9fe Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 22:08:38 -0500 Subject: [PATCH 14/21] setblockparam --- lib/syntax_tree/compiler.rb | 9 +++++++-- lib/syntax_tree/yarv.rb | 9 +++++++-- test/compiler_test.rb | 1 + 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 91ec3d30..8e1a0eaf 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -415,8 +415,13 @@ def visit_assign(node) when GVar iseq.setglobal(node.target.value.value.to_sym) when Ident - local_variable = visit(node.target) - iseq.setlocal(local_variable.index, local_variable.level) + lookup = visit(node.target) + + if lookup.local.is_a?(YARV::LocalTable::BlockLocal) + iseq.setblockparam(lookup.index, lookup.level) + else + iseq.setlocal(lookup.index, lookup.level) + end when IVar iseq.setinstancevariable(node.target.value.value.to_sym) end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 0c4c3fc9..a204989e 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -252,13 +252,13 @@ def to_a insn when Array case insn[0] - when :setlocal_WC_0, :setlocal_WC_1, :setlocal + when :setlocal_WC_0, :setlocal_WC_1, :setlocal, :setblockparam iseq = self case insn[0] when :setlocal_WC_1 iseq = iseq.parent_iseq - when :setlocal + when :setlocal, :setblockparam insn[2].times { iseq = iseq.parent_iseq } end @@ -704,6 +704,11 @@ def send(method_id, argc, flag = VM_CALL_ARGS_SIMPLE, block_iseq = nil) end end + def setblockparam(index, level) + stack.change_by(-1) + push([:setblockparam, index, level]) + end + def setclassvariable(name) stack.change_by(-1) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 98559664..56e38577 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -361,6 +361,7 @@ class CompilerTest < Minitest::Test "def foo(bar, *baz, &qux); end", "def foo(&qux); qux; end", "def foo(&qux); qux.call; end", + "def foo(&qux); qux = bar; end", "def foo(bar:); end", "def foo(bar:, baz:); end", "def foo(bar: 1); end", From f35c452221590d1f3dcea49e99d2992d674952e6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 22:21:32 -0500 Subject: [PATCH 15/21] setspecial --- lib/syntax_tree/compiler.rb | 52 +++++++++++++++++++++++++++---------- lib/syntax_tree/yarv.rb | 10 +++++++ test/compiler_test.rb | 1 + 3 files changed, 49 insertions(+), 14 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 8e1a0eaf..3a4af3da 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -438,7 +438,7 @@ def visit_assoc_splat(node) end def visit_backref(node) - iseq.getspecial(1, 2 * node.value[1..].to_i) + iseq.getspecial(YARV::VM_SVAR_BACKREF, 2 * node.value[1..].to_i) end def visit_bare_assoc_hash(node) @@ -888,25 +888,49 @@ def visit_heredoc(node) end def visit_if(node) - visit(node.predicate) - branchunless = iseq.branchunless(-1) - visit(node.statements) + if node.predicate.is_a?(RangeNode) + iseq.getspecial(YARV::VM_SVAR_FLIPFLOP_START, 0) + branchif = iseq.branchif(-1) - if last_statement? - iseq.leave - branchunless.patch!(iseq) + visit(node.predicate.left) + branchunless_true = iseq.branchunless(-1) - node.consequent ? visit(node.consequent) : iseq.putnil + iseq.putobject(true) + iseq.setspecial(YARV::VM_SVAR_FLIPFLOP_START) + branchif.patch!(iseq) + + visit(node.predicate.right) + branchunless_false = iseq.branchunless(-1) + + iseq.putobject(false) + iseq.setspecial(YARV::VM_SVAR_FLIPFLOP_START) + branchunless_false.patch!(iseq) + + visit(node.statements) + iseq.leave + branchunless_true.patch!(iseq) + iseq.putnil else - iseq.pop + visit(node.predicate) + branchunless = iseq.branchunless(-1) + visit(node.statements) - if node.consequent - jump = iseq.jump(-1) + if last_statement? + iseq.leave branchunless.patch!(iseq) - visit(node.consequent) - jump[1] = iseq.label + + node.consequent ? visit(node.consequent) : iseq.putnil else - branchunless.patch!(iseq) + iseq.pop + + if node.consequent + jump = iseq.jump(-1) + branchunless.patch!(iseq) + visit(node.consequent) + jump[1] = iseq.label + else + branchunless.patch!(iseq) + end end end end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index a204989e..6056fded 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -765,6 +765,11 @@ def setn(number) push([:setn, number]) end + def setspecial(key) + stack.change_by(-1) + push([:setspecial, key]) + end + def splatarray(flag) stack.change_by(-1 + 1) push([:splatarray, flag]) @@ -817,5 +822,10 @@ def call_data(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) VM_CALL_ZSUPER = 1 << 10 VM_CALL_OPT_SEND = 1 << 11 VM_CALL_KW_SPLAT_MUT = 1 << 12 + + # These constants correspond to the setspecial instruction. + VM_SVAR_LASTLINE = 0 # $_ + VM_SVAR_BACKREF = 1 # $~ + VM_SVAR_FLIPFLOP_START = 2 # flipflop end end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 56e38577..c1dab39c 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -285,6 +285,7 @@ class CompilerTest < Minitest::Test "foo ? bar : baz", "case foo when bar then 1 end", "case foo when bar then 1 else 2 end", + "baz if (foo == 1) .. (bar == 1)", # Constructed values "foo..bar", "foo...bar", From 1262b52c781d35df4c911d87ed47be2322812b0d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 22:33:43 -0500 Subject: [PATCH 16/21] newarraykwsplat --- lib/syntax_tree/compiler.rb | 9 +++++++++ lib/syntax_tree/yarv.rb | 5 +++++ test/compiler_test.rb | 1 + 3 files changed, 15 insertions(+) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 3a4af3da..1b2c5987 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -311,6 +311,15 @@ def visit_args(node) def visit_array(node) if (compiled = RubyVisitor.compile(node)) iseq.duparray(compiled) + elsif node.contents && node.contents.parts.length == 1 && + node.contents.parts.first.is_a?(BareAssocHash) && + node.contents.parts.first.assocs.length == 1 && + node.contents.parts.first.assocs.first.is_a?(AssocSplat) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.newhash(0) + visit(node.contents.parts.first) + iseq.send(:"core#hash_merge_kwd", 2) + iseq.newarraykwsplat(1) else length = 0 diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 6056fded..b168a135 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -502,6 +502,11 @@ def newarray(length) push([:newarray, length]) end + def newarraykwsplat(length) + stack.change_by(-length + 1) + push([:newarraykwsplat, length]) + end + def newhash(length) stack.change_by(-length + 1) push([:newhash, length]) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index c1dab39c..d44eef50 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -311,6 +311,7 @@ class CompilerTest < Minitest::Test "[1, 2, 3].min", "[foo, bar, baz].min", "[foo, bar, baz].min(1)", + "[**{ x: true }][0][:x]", # Core method calls "alias foo bar", "alias :foo :bar", From d4d7f0b4a65e94dc98b434ceec2c805fe62e8f1c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 10:38:43 -0500 Subject: [PATCH 17/21] Pattern match for arrays --- lib/syntax_tree/compiler.rb | 122 +++++++++++++++++++++++++++ lib/syntax_tree/yarv.rb | 18 ++++ lib/syntax_tree/yarv/instructions.rb | 4 +- test/compiler_test.rb | 8 +- 4 files changed, 149 insertions(+), 3 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 1b2c5987..ac49f7e0 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -343,6 +343,101 @@ def visit_array(node) end end + def visit_aryptn(node) + match_failures = [] + jumps_to_exit = [] + + # If there's a constant, then check if we match against that constant or + # not first. Branch to failure if we don't. + if node.constant + iseq.dup + visit(node.constant) + iseq.checkmatch(YARV::VM_CHECKMATCH_TYPE_CASE) + match_failures << iseq.branchunless(-1) + end + + # First, check if the #deconstruct cache is nil. If it is, we're going to + # call #deconstruct on the object and cache the result. + iseq.topn(2) + branchnil = iseq.branchnil(-1) + + # Next, ensure that the cached value was cached correctly, otherwise fail + # the match. + iseq.topn(2) + match_failures << iseq.branchunless(-1) + + # Since we have a valid cached value, we can skip past the part where we + # call #deconstruct on the object. + iseq.pop + iseq.topn(1) + jump = iseq.jump(-1) + + # Check if the object responds to #deconstruct, fail the match otherwise. + branchnil.patch!(iseq) + iseq.dup + iseq.putobject(:deconstruct) + iseq.send(:respond_to?, 1) + iseq.setn(3) + match_failures << iseq.branchunless(-1) + + # Call #deconstruct and ensure that it's an array, raise an error + # otherwise. + iseq.send(:deconstruct, 0) + iseq.setn(2) + iseq.dup + iseq.checktype(YARV::VM_CHECKTYPE_ARRAY) + match_error = iseq.branchunless(-1) + + # Ensure that the deconstructed array has the correct size, fail the match + # otherwise. + jump[1] = iseq.label + iseq.dup + iseq.send(:length, 0) + iseq.putobject(node.requireds.length) + iseq.send(:==, 1) + match_failures << iseq.branchunless(-1) + + # For each required element, check if the deconstructed array contains the + # element, otherwise jump out to the top-level match failure. + iseq.dup + node.requireds.each_with_index do |required, index| + iseq.putobject(index) + iseq.send(:[], 1) + + case required + when VarField + lookup = visit(required) + iseq.setlocal(lookup.index, lookup.level) + else + visit(required) + iseq.checkmatch(YARV::VM_CHECKMATCH_TYPE_CASE) + match_failures << iseq.branchunless(-1) + end + + if index < node.requireds.length - 1 + iseq.dup + else + iseq.pop + jumps_to_exit << iseq.jump(-1) + end + end + + # Set up the routine here to raise an error to indicate that the type of + # the deconstructed array was incorrect. + match_error.patch!(iseq) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.putobject(TypeError) + iseq.putobject("deconstruct must return Array") + iseq.send(:"core#raise", 2) + iseq.pop + + # Patch all of the match failures to jump here so that we pop a final + # value before returning to the parent node. + match_failures.each { |match_failure| match_failure.patch!(iseq) } + iseq.pop + jumps_to_exit + end + def visit_assign(node) case node.target when ARefField @@ -1298,6 +1393,33 @@ def visit_range(node) end end + def visit_rassign(node) + if node.operator.is_a?(Kw) + iseq.putnil + visit(node.value) + iseq.dup + jumps = [] + + case node.pattern + when VarField + lookup = visit(node.pattern) + iseq.setlocal(lookup.index, lookup.level) + jumps << iseq.jump(-1) + else + jumps.concat(visit(node.pattern)) + end + + iseq.pop + iseq.pop + iseq.putobject(false) + iseq.leave + + jumps.each { |jump| jump[1] = iseq.label } + iseq.adjuststack(2) + iseq.putobject(true) + end + end + def visit_rational(node) iseq.putobject(node.accept(RubyVisitor.new)) end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index b168a135..2ca29de7 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -370,6 +370,16 @@ def checkkeyword(keyword_bits_index, keyword_index) push(CheckKeyword.new(keyword_bits_index, keyword_index)) end + def checkmatch(flag) + stack.change_by(-2 + 1) + push([:checkmatch, flag]) + end + + def checktype(type) + stack.change_by(-1 + 2) + push([:checktype, type]) + end + def concatarray push(ConcatArray.new) end @@ -832,5 +842,13 @@ def call_data(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) VM_SVAR_LASTLINE = 0 # $_ VM_SVAR_BACKREF = 1 # $~ VM_SVAR_FLIPFLOP_START = 2 # flipflop + + # These constants correspond to the checktype instruction. + VM_CHECKTYPE_ARRAY = 7 + + # These constants correspond to the checkmatch instruction. + VM_CHECKMATCH_TYPE_WHEN = 1 + VM_CHECKMATCH_TYPE_CASE = 2 + VM_CHECKMATCH_TYPE_RESCUE = 3 end end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index c50c5c84..ccb7a345 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -632,11 +632,11 @@ def length end def pops - number + 0 end def pushes - number * 2 + number end end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index d44eef50..4f4fa9f3 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -416,7 +416,13 @@ class CompilerTest < Minitest::Test "-> {}", "-> (bar) do end", "-> (bar) {}", - "-> (bar; baz) { }" + "-> (bar; baz) { }", + # Pattern matching + "foo in bar", + "foo in [bar]", + "foo in [bar, baz]", + "foo in [1, 2, 3, bar, 4, 5, 6, baz]", + "foo in Foo[1, 2, 3, bar, 4, 5, 6, baz]", ] # These are the combinations of instructions that we're going to test. From 5abcb5a646fc3d4a9f22c2de085dc162e53b8ebd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 11:01:49 -0500 Subject: [PATCH 18/21] Handle => operator for rightward assignment --- lib/syntax_tree/compiler.rb | 77 ++++++++++++++++++++++++++++++++++++- test/compiler_test.rb | 1 + 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index ac49f7e0..4050f4c9 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -1394,11 +1394,13 @@ def visit_range(node) end def visit_rassign(node) + iseq.putnil + if node.operator.is_a?(Kw) - iseq.putnil + jumps = [] + visit(node.value) iseq.dup - jumps = [] case node.pattern when VarField @@ -1417,6 +1419,77 @@ def visit_rassign(node) jumps.each { |jump| jump[1] = iseq.label } iseq.adjuststack(2) iseq.putobject(true) + else + jumps_to_match = [] + + iseq.putnil + iseq.putobject(false) + iseq.putnil + iseq.putnil + visit(node.value) + iseq.dup + + # Visit the pattern. If it matches, + case node.pattern + when VarField + lookup = visit(node.pattern) + iseq.setlocal(lookup.index, lookup.level) + jumps_to_match << iseq.jump(-1) + else + jumps_to_match.concat(visit(node.pattern)) + end + + # First we're going to push the core onto the stack, then we'll check if + # the value to match is truthy. If it is, we'll jump down to raise + # NoMatchingPatternKeyError. Otherwise we'll raise + # NoMatchingPatternError. + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.topn(4) + branchif_no_key = iseq.branchif(-1) + + # Here we're going to raise NoMatchingPatternError. + iseq.putobject(NoMatchingPatternError) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(:"core#sprintf", 3) + iseq.send(:"core#raise", 2) + jump_to_exit = iseq.jump(-1) + + # Here we're going to raise NoMatchingPatternKeyError. + branchif_no_key.patch!(iseq) + iseq.putobject(NoMatchingPatternKeyError) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(:"core#sprintf", 3) + iseq.topn(7) + iseq.topn(9) + + # Super special behavior here because of the weird kw_arg handling. + iseq.stack.change_by(-(1 + 1) + 1) + call_data = { mid: :new, flag: YARV::VM_CALL_KWARG, orig_argc: 1, kw_arg: [:matchee, :key] } + + if specialized_instruction + iseq.push([:opt_send_without_block, call_data]) + else + iseq.push([:send, call_data, nil]) + end + + iseq.send(:"core#raise", 1) + + # This runs when the pattern fails to match. + jump_to_exit[1] = iseq.label + iseq.adjuststack(7) + iseq.putnil + iseq.leave + + # This runs when the pattern matches successfully. + jumps_to_match.each { |jump| jump[1] = iseq.label } + iseq.adjuststack(6) + iseq.putnil end end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 4f4fa9f3..c2472432 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -423,6 +423,7 @@ class CompilerTest < Minitest::Test "foo in [bar, baz]", "foo in [1, 2, 3, bar, 4, 5, 6, baz]", "foo in Foo[1, 2, 3, bar, 4, 5, 6, baz]", + "foo => bar" ] # These are the combinations of instructions that we're going to test. From 8a0f1ecc1eae2943d50a3a86473ffc2c329e27be Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 11:41:06 -0500 Subject: [PATCH 19/21] Create Legacy module for legacy YARV instructions --- lib/syntax_tree/compiler.rb | 24 +-- lib/syntax_tree/yarv.rb | 4 +- lib/syntax_tree/yarv/instructions.rb | 250 ++++++++++++++------------- 3 files changed, 141 insertions(+), 137 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 4050f4c9..c4eb5194 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -870,18 +870,18 @@ def visit_defined(node) case value when Const iseq.putnil - iseq.defined(YARV::Defined::CONST, name, "constant") + iseq.defined(YARV::Defined::TYPE_CONST, name, "constant") when CVar iseq.putnil - iseq.defined(YARV::Defined::CVAR, name, "class variable") + iseq.defined(YARV::Defined::TYPE_CVAR, name, "class variable") when GVar iseq.putnil - iseq.defined(YARV::Defined::GVAR, name, "global-variable") + iseq.defined(YARV::Defined::TYPE_GVAR, name, "global-variable") when Ident iseq.putobject("local-variable") when IVar iseq.putnil - iseq.defined(YARV::Defined::IVAR, name, "instance-variable") + iseq.defined(YARV::Defined::TYPE_IVAR, name, "instance-variable") when Kw case name when :false @@ -898,13 +898,13 @@ def visit_defined(node) iseq.putself name = node.value.value.value.to_sym - iseq.defined(YARV::Defined::FUNC, name, "method") + iseq.defined(YARV::Defined::TYPE_FUNC, name, "method") when YieldNode iseq.putnil - iseq.defined(YARV::Defined::YIELD, false, "yield") + iseq.defined(YARV::Defined::TYPE_YIELD, false, "yield") when ZSuper iseq.putnil - iseq.defined(YARV::Defined::ZSUPER, false, "super") + iseq.defined(YARV::Defined::TYPE_ZSUPER, false, "super") else iseq.putobject("expression") end @@ -1875,24 +1875,24 @@ def opassign_defined(node) name = node.target.constant.value.to_sym iseq.dup - iseq.defined(YARV::Defined::CONST_FROM, name, true) + iseq.defined(YARV::Defined::TYPE_CONST_FROM, name, true) when TopConstField name = node.target.constant.value.to_sym iseq.putobject(Object) iseq.dup - iseq.defined(YARV::Defined::CONST_FROM, name, true) + iseq.defined(YARV::Defined::TYPE_CONST_FROM, name, true) when VarField name = node.target.value.value.to_sym iseq.putnil case node.target.value when Const - iseq.defined(YARV::Defined::CONST, name, true) + iseq.defined(YARV::Defined::TYPE_CONST, name, true) when CVar - iseq.defined(YARV::Defined::CVAR, name, true) + iseq.defined(YARV::Defined::TYPE_CVAR, name, true) when GVar - iseq.defined(YARV::Defined::GVAR, name, true) + iseq.defined(YARV::Defined::TYPE_GVAR, name, true) end end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 2ca29de7..89920c6a 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -434,14 +434,14 @@ def getblockparamproxy(index, level) def getclassvariable(name) if RUBY_VERSION < "3.0" - push(GetClassVariableUncached.new(name)) + push(Legacy::GetClassVariable.new(name)) else push(GetClassVariable.new(name, inline_storage_for(name))) end end def getconstant(name) - push(GetConstant.new(name)) + push(Legacy::GetConstant.new(name)) end def getglobal(name) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index ccb7a345..e6853a87 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -333,44 +333,36 @@ def pushes # ### Summary # - # `defined` checks if the top value of the stack is defined. If it is, it - # pushes its value onto the stack. Otherwise it pushes `nil`. + # `defineclass` defines a class. First it pops the superclass off the + # stack, then it pops the object off the stack that the class should be + # defined under. It has three arguments: the name of the constant, the + # instruction sequence associated with the class, and various flags that + # indicate if it is a singleton class, a module, or a regular class. # # ### Usage # # ~~~ruby - # defined?(x) + # class Foo + # end # ~~~ # - class Defined - NIL = 1 - IVAR = 2 - LVAR = 3 - GVAR = 4 - CVAR = 5 - CONST = 6 - METHOD = 7 - YIELD = 8 - ZSUPER = 9 - SELF = 10 - TRUE = 11 - FALSE = 12 - ASGN = 13 - EXPR = 14 - REF = 15 - FUNC = 16 - CONST_FROM = 17 + class DefineClass + TYPE_CLASS = 0 + TYPE_SINGLETON_CLASS = 1 + TYPE_MODULE = 2 + FLAG_SCOPED = 8 + FLAG_HAS_SUPERCLASS = 16 - attr_reader :type, :name, :message + attr_reader :name, :class_iseq, :flags - def initialize(type, name, message) - @type = type + def initialize(name, class_iseq, flags) @name = name - @message = message + @class_iseq = class_iseq + @flags = flags end def to_a(_iseq) - [:defined, type, name, message] + [:defineclass, name, class_iseq.to_a, flags] end def length @@ -378,7 +370,7 @@ def length end def pops - 1 + 2 end def pushes @@ -388,36 +380,44 @@ def pushes # ### Summary # - # `defineclass` defines a class. First it pops the superclass off the - # stack, then it pops the object off the stack that the class should be - # defined under. It has three arguments: the name of the constant, the - # instruction sequence associated with the class, and various flags that - # indicate if it is a singleton class, a module, or a regular class. + # `defined` checks if the top value of the stack is defined. If it is, it + # pushes its value onto the stack. Otherwise it pushes `nil`. # # ### Usage # # ~~~ruby - # class Foo - # end + # defined?(x) # ~~~ # - class DefineClass - TYPE_CLASS = 0 - TYPE_SINGLETON_CLASS = 1 - TYPE_MODULE = 2 - FLAG_SCOPED = 8 - FLAG_HAS_SUPERCLASS = 16 + class Defined + TYPE_NIL = 1 + TYPE_IVAR = 2 + TYPE_LVAR = 3 + TYPE_GVAR = 4 + TYPE_CVAR = 5 + TYPE_CONST = 6 + TYPE_METHOD = 7 + TYPE_YIELD = 8 + TYPE_ZSUPER = 9 + TYPE_SELF = 10 + TYPE_TRUE = 11 + TYPE_FALSE = 12 + TYPE_ASGN = 13 + TYPE_EXPR = 14 + TYPE_REF = 15 + TYPE_FUNC = 16 + TYPE_CONST_FROM = 17 - attr_reader :name, :class_iseq, :flags + attr_reader :type, :name, :message - def initialize(name, class_iseq, flags) + def initialize(type, name, message) + @type = type @name = name - @class_iseq = class_iseq - @flags = flags + @message = message end def to_a(_iseq) - [:defineclass, name, class_iseq.to_a, flags] + [:defined, type, name, message] end def length @@ -425,7 +425,7 @@ def length end def pops - 2 + 1 end def pushes @@ -800,83 +800,6 @@ def pushes end end - # ### Summary - # - # `getclassvariable` looks for a class variable in the current class and - # pushes its value onto the stack. - # - # This version of the `getclassvariable` instruction is no longer used since - # in Ruby 3.0 it gained an inline cache.` - # - # ### Usage - # - # ~~~ruby - # @@class_variable - # ~~~ - # - class GetClassVariableUncached - attr_reader :name - - def initialize(name) - @name = name - end - - def to_a(_iseq) - [:getclassvariable, name] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - end - - # ### Summary - # - # `getconstant` performs a constant lookup and pushes the value of the - # constant onto the stack. It pops both the class it should look in and - # whether or not it should look globally as well. - # - # This instruction is no longer used since in Ruby 3.2 it was replaced by - # the consolidated `opt_getconstant_path` instruction. - # - # ### Usage - # - # ~~~ruby - # Constant - # ~~~ - # - class GetConstant - attr_reader :name - - def initialize(name) - @name = name - end - - def to_a(_iseq) - [:getconstant, name] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - end - # ### Summary # # `getglobal` pushes the value of a global variables onto the stack. @@ -1067,5 +990,86 @@ def pushes 1 end end + + # This module contains the instructions that used to be a part of YARV but + # have been replaced or removed in more recent versions. + module Legacy + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. + # + # This version of the `getclassvariable` instruction is no longer used + # since in Ruby 3.0 it gained an inline cache.` + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariable + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getclassvariable, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getconstant` performs a constant lookup and pushes the value of the + # constant onto the stack. It pops both the class it should look in and + # whether or not it should look globally as well. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class GetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + end end end From ba8cad0d1485b5e039e669decc0d2f6dbb61fa07 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 11:45:28 -0500 Subject: [PATCH 20/21] More instructions to classes --- .rubocop.yml | 3 + lib/syntax_tree.rb | 7 +- lib/syntax_tree/compiler.rb | 2131 -------------- lib/syntax_tree/yarv.rb | 851 +----- lib/syntax_tree/yarv/bf.rb | 30 +- lib/syntax_tree/yarv/compiler.rb | 2164 ++++++++++++++ lib/syntax_tree/yarv/disassembler.rb | 247 +- lib/syntax_tree/yarv/instruction_sequence.rb | 671 +++++ lib/syntax_tree/yarv/instructions.rb | 2688 +++++++++++++++++- lib/syntax_tree/yarv/legacy.rb | 169 ++ lib/syntax_tree/yarv/local_table.rb | 81 + test/compiler_test.rb | 5 +- test/yarv_test.rb | 4 +- 13 files changed, 5823 insertions(+), 3228 deletions(-) delete mode 100644 lib/syntax_tree/compiler.rb create mode 100644 lib/syntax_tree/yarv/compiler.rb create mode 100644 lib/syntax_tree/yarv/instruction_sequence.rb create mode 100644 lib/syntax_tree/yarv/legacy.rb create mode 100644 lib/syntax_tree/yarv/local_table.rb diff --git a/.rubocop.yml b/.rubocop.yml index 134a75dc..b7ba43e8 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -64,6 +64,9 @@ Style/CaseEquality: Style/CaseLikeIf: Enabled: false +Style/Documentation: + Enabled: false + Style/ExplicitBlockArgument: Enabled: false diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 792ba20c..b2ff8414 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -10,6 +10,7 @@ require_relative "syntax_tree/formatter" require_relative "syntax_tree/node" +require_relative "syntax_tree/dsl" require_relative "syntax_tree/version" require_relative "syntax_tree/basic_visitor" @@ -26,12 +27,14 @@ require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" -require_relative "syntax_tree/dsl" require_relative "syntax_tree/yarv" -require_relative "syntax_tree/compiler" require_relative "syntax_tree/yarv/bf" +require_relative "syntax_tree/yarv/compiler" require_relative "syntax_tree/yarv/disassembler" +require_relative "syntax_tree/yarv/instruction_sequence" require_relative "syntax_tree/yarv/instructions" +require_relative "syntax_tree/yarv/legacy" +require_relative "syntax_tree/yarv/local_table" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb deleted file mode 100644 index c4eb5194..00000000 --- a/lib/syntax_tree/compiler.rb +++ /dev/null @@ -1,2131 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - # This class is an experiment in transforming Syntax Tree nodes into their - # corresponding YARV instruction sequences. It attempts to mirror the - # behavior of RubyVM::InstructionSequence.compile. - # - # You use this as with any other visitor. First you parse code into a tree, - # then you visit it with this compiler. Visiting the root node of the tree - # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. - # With that object you can call #to_a on it, which will return a serialized - # form of the instruction sequence as an array. This array _should_ mirror - # the array given by RubyVM::InstructionSequence#to_a. - # - # As an example, here is how you would compile a single expression: - # - # program = SyntaxTree.parse("1 + 2") - # program.accept(SyntaxTree::Visitor::Compiler.new).to_a - # - # [ - # "YARVInstructionSequence/SimpleDataFormat", - # 3, - # 1, - # 1, - # {:arg_size=>0, :local_size=>0, :stack_max=>2}, - # "", - # "", - # "", - # 1, - # :top, - # [], - # {}, - # [], - # [ - # [:putobject_INT2FIX_1_], - # [:putobject, 2], - # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], - # [:leave] - # ] - # ] - # - # Note that this is the same output as calling: - # - # RubyVM::InstructionSequence.compile("1 + 2").to_a - # - class Compiler < BasicVisitor - # This visitor is responsible for converting Syntax Tree nodes into their - # corresponding Ruby structures. This is used to convert the operands of - # some instructions like putobject that push a Ruby object directly onto - # the stack. It is only used when the entire structure can be represented - # at compile-time, as opposed to constructed at run-time. - class RubyVisitor < BasicVisitor - # This error is raised whenever a node cannot be converted into a Ruby - # object at compile-time. - class CompilationError < StandardError - end - - # This will attempt to compile the given node. If it's possible, then - # it will return the compiled object. Otherwise it will return nil. - def self.compile(node) - node.accept(new) - rescue CompilationError - end - - def visit_array(node) - visit_all(node.contents.parts) - end - - def visit_bare_assoc_hash(node) - node.assocs.to_h do |assoc| - # We can only convert regular key-value pairs. A double splat ** - # operator means it has to be converted at run-time. - raise CompilationError unless assoc.is_a?(Assoc) - [visit(assoc.key), visit(assoc.value)] - end - end - - def visit_float(node) - node.value.to_f - end - - alias visit_hash visit_bare_assoc_hash - - def visit_imaginary(node) - node.value.to_c - end - - def visit_int(node) - node.value.to_i - end - - def visit_label(node) - node.value.chomp(":").to_sym - end - - def visit_mrhs(node) - visit_all(node.parts) - end - - def visit_qsymbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_qwords(node) - visit_all(node.elements) - end - - def visit_range(node) - left, right = [visit(node.left), visit(node.right)] - node.operator.value === ".." ? left..right : left...right - end - - def visit_rational(node) - node.value.to_r - end - - def visit_regexp_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) - else - # Any interpolation of expressions or variables will result in the - # regular expression being constructed at run-time. - raise CompilationError - end - end - - # This isn't actually a visit method, though maybe it should be. It is - # responsible for converting the set of string options on a regular - # expression into its equivalent integer. - def visit_regexp_literal_flags(node) - node - .options - .chars - .inject(0) do |accum, option| - accum | - case option - when "i" - Regexp::IGNORECASE - when "x" - Regexp::EXTENDED - when "m" - Regexp::MULTILINE - else - raise "Unknown regexp option: #{option}" - end - end - end - - def visit_symbol_literal(node) - node.value.value.to_sym - end - - def visit_symbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_tstring_content(node) - node.value - end - - def visit_var_ref(node) - raise CompilationError unless node.value.is_a?(Kw) - - case node.value.value - when "nil" - nil - when "true" - true - when "false" - false - else - raise CompilationError - end - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - # Any interpolation of expressions or variables will result in the - # string being constructed at run-time. - raise CompilationError - end - end - - def visit_words(node) - visit_all(node.elements) - end - - def visit_unsupported(_node) - raise CompilationError - end - - # Please forgive the metaprogramming here. This is used to create visit - # methods for every node that we did not explicitly handle. By default - # each of these methods will raise a CompilationError. - handled = instance_methods(false) - (Visitor.instance_methods(false) - handled).each do |method| - alias_method method, :visit_unsupported - end - end - - # These options mirror the compilation options that we currently support - # that can be also passed to RubyVM::InstructionSequence.compile. - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - # The current instruction sequence that is being compiled. - attr_reader :iseq - - # A boolean to track if we're currently compiling the last statement - # within a set of statements. This information is necessary to determine - # if we need to return the value of the last statement. - attr_reader :last_statement - - def initialize( - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - - @iseq = nil - @last_statement = false - end - - def visit_BEGIN(node) - visit(node.statements) - end - - def visit_CHAR(node) - if frozen_string_literal - iseq.putobject(node.value[1..]) - else - iseq.putstring(node.value[1..]) - end - end - - def visit_END(node) - once_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do - postexe_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_B_CALL) - - *statements, last_statement = node.statements.body - visit_all(statements) - with_last_statement { visit(last_statement) } - - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.send(:"core#set_postexe", 0, YARV::VM_CALL_FCALL, postexe_iseq) - iseq.leave - end - - iseq.once(once_iseq, iseq.inline_storage) - iseq.pop - end - - def visit_alias(node) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) - visit(node.left) - visit(node.right) - iseq.send(:"core#set_method_alias", 3) - end - - def visit_aref(node) - visit(node.collection) - - if !frozen_string_literal && specialized_instruction && (node.index.parts.length == 1) - arg = node.index.parts.first - - if arg.is_a?(StringLiteral) && (arg.parts.length == 1) - string_part = arg.parts.first - - if string_part.is_a?(TStringContent) - iseq.opt_aref_with(string_part.value, :[], 1) - return - end - end - end - - visit(node.index) - iseq.send(:[], 1) - end - - def visit_arg_block(node) - visit(node.value) - end - - def visit_arg_paren(node) - visit(node.arguments) - end - - def visit_arg_star(node) - visit(node.value) - iseq.splatarray(false) - end - - def visit_args(node) - visit_all(node.parts) - end - - def visit_array(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - elsif node.contents && node.contents.parts.length == 1 && - node.contents.parts.first.is_a?(BareAssocHash) && - node.contents.parts.first.assocs.length == 1 && - node.contents.parts.first.assocs.first.is_a?(AssocSplat) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.newhash(0) - visit(node.contents.parts.first) - iseq.send(:"core#hash_merge_kwd", 2) - iseq.newarraykwsplat(1) - else - length = 0 - - node.contents.parts.each do |part| - if part.is_a?(ArgStar) - if length > 0 - iseq.newarray(length) - length = 0 - end - - visit(part.value) - iseq.concatarray - else - visit(part) - length += 1 - end - end - - iseq.newarray(length) if length > 0 - iseq.concatarray if length > 0 && length != node.contents.parts.length - end - end - - def visit_aryptn(node) - match_failures = [] - jumps_to_exit = [] - - # If there's a constant, then check if we match against that constant or - # not first. Branch to failure if we don't. - if node.constant - iseq.dup - visit(node.constant) - iseq.checkmatch(YARV::VM_CHECKMATCH_TYPE_CASE) - match_failures << iseq.branchunless(-1) - end - - # First, check if the #deconstruct cache is nil. If it is, we're going to - # call #deconstruct on the object and cache the result. - iseq.topn(2) - branchnil = iseq.branchnil(-1) - - # Next, ensure that the cached value was cached correctly, otherwise fail - # the match. - iseq.topn(2) - match_failures << iseq.branchunless(-1) - - # Since we have a valid cached value, we can skip past the part where we - # call #deconstruct on the object. - iseq.pop - iseq.topn(1) - jump = iseq.jump(-1) - - # Check if the object responds to #deconstruct, fail the match otherwise. - branchnil.patch!(iseq) - iseq.dup - iseq.putobject(:deconstruct) - iseq.send(:respond_to?, 1) - iseq.setn(3) - match_failures << iseq.branchunless(-1) - - # Call #deconstruct and ensure that it's an array, raise an error - # otherwise. - iseq.send(:deconstruct, 0) - iseq.setn(2) - iseq.dup - iseq.checktype(YARV::VM_CHECKTYPE_ARRAY) - match_error = iseq.branchunless(-1) - - # Ensure that the deconstructed array has the correct size, fail the match - # otherwise. - jump[1] = iseq.label - iseq.dup - iseq.send(:length, 0) - iseq.putobject(node.requireds.length) - iseq.send(:==, 1) - match_failures << iseq.branchunless(-1) - - # For each required element, check if the deconstructed array contains the - # element, otherwise jump out to the top-level match failure. - iseq.dup - node.requireds.each_with_index do |required, index| - iseq.putobject(index) - iseq.send(:[], 1) - - case required - when VarField - lookup = visit(required) - iseq.setlocal(lookup.index, lookup.level) - else - visit(required) - iseq.checkmatch(YARV::VM_CHECKMATCH_TYPE_CASE) - match_failures << iseq.branchunless(-1) - end - - if index < node.requireds.length - 1 - iseq.dup - else - iseq.pop - jumps_to_exit << iseq.jump(-1) - end - end - - # Set up the routine here to raise an error to indicate that the type of - # the deconstructed array was incorrect. - match_error.patch!(iseq) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.putobject(TypeError) - iseq.putobject("deconstruct must return Array") - iseq.send(:"core#raise", 2) - iseq.pop - - # Patch all of the match failures to jump here so that we pop a final - # value before returning to the parent node. - match_failures.each { |match_failure| match_failure.patch!(iseq) } - iseq.pop - jumps_to_exit - end - - def visit_assign(node) - case node.target - when ARefField - if !frozen_string_literal && specialized_instruction && (node.target.index.parts.length == 1) - arg = node.target.index.parts.first - - if arg.is_a?(StringLiteral) && (arg.parts.length == 1) - string_part = arg.parts.first - - if string_part.is_a?(TStringContent) - visit(node.target.collection) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.opt_aset_with(string_part.value, :[]=, 2) - iseq.pop - return - end - end - end - - iseq.putnil - visit(node.target.collection) - visit(node.target.index) - visit(node.value) - iseq.setn(3) - iseq.send(:[]=, 2) - iseq.pop - when ConstPathField - names = constant_names(node.target) - name = names.pop - - if RUBY_VERSION >= "3.2" - iseq.opt_getconstant_path(names) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.swap - iseq.setconstant(name) - else - visit(node.value) - iseq.dup if last_statement? - iseq.opt_getconstant_path(names) - iseq.setconstant(name) - end - when Field - iseq.putnil - visit(node.target) - visit(node.value) - iseq.setn(2) - iseq.send(:"#{node.target.name.value}=", 1) - iseq.pop - when TopConstField - name = node.target.constant.value.to_sym - - if RUBY_VERSION >= "3.2" - iseq.putobject(Object) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.swap - iseq.setconstant(name) - else - visit(node.value) - iseq.dup if last_statement? - iseq.putobject(Object) - iseq.setconstant(name) - end - when VarField - visit(node.value) - iseq.dup if last_statement? - - case node.target.value - when Const - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - iseq.setconstant(node.target.value.value.to_sym) - when CVar - iseq.setclassvariable(node.target.value.value.to_sym) - when GVar - iseq.setglobal(node.target.value.value.to_sym) - when Ident - lookup = visit(node.target) - - if lookup.local.is_a?(YARV::LocalTable::BlockLocal) - iseq.setblockparam(lookup.index, lookup.level) - else - iseq.setlocal(lookup.index, lookup.level) - end - when IVar - iseq.setinstancevariable(node.target.value.value.to_sym) - end - end - end - - def visit_assoc(node) - visit(node.key) - visit(node.value) - end - - def visit_assoc_splat(node) - visit(node.value) - end - - def visit_backref(node) - iseq.getspecial(YARV::VM_SVAR_BACKREF, 2 * node.value[1..].to_i) - end - - def visit_bare_assoc_hash(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duphash(compiled) - else - visit_all(node.assocs) - end - end - - def visit_binary(node) - case node.operator - when :"&&" - visit(node.left) - iseq.dup - - branchunless = iseq.branchunless(-1) - iseq.pop - - visit(node.right) - branchunless.patch!(iseq) - when :"||" - visit(node.left) - iseq.dup - - branchif = iseq.branchif(-1) - iseq.pop - - visit(node.right) - branchif.patch!(iseq) - else - visit(node.left) - visit(node.right) - iseq.send(node.operator, 1) - end - end - - def visit_block(node) - with_child_iseq(iseq.block_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_B_CALL) - visit(node.block_var) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - end - - def visit_block_var(node) - params = node.params - - if params.requireds.length == 1 && params.optionals.empty? && - !params.rest && params.posts.empty? && params.keywords.empty? && - !params.keyword_rest && !params.block - iseq.argument_options[:ambiguous_param0] = true - end - - visit(node.params) - - node.locals.each { |local| iseq.local_table.plain(local.value.to_sym) } - end - - def visit_blockarg(node) - iseq.argument_options[:block_start] = iseq.argument_size - iseq.local_table.block(node.name.value.to_sym) - iseq.argument_size += 1 - end - - def visit_bodystmt(node) - visit(node.statements) - end - - def visit_call(node) - if node.is_a?(CallNode) - return( - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: nil, - location: node.location - ) - ) - ) - end - - arg_parts = argument_parts(node.arguments) - argc = arg_parts.length - - # First we're going to check if we're calling a method on an array - # literal without any arguments. In that case there are some - # specializations we might be able to perform. - if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) - case node.receiver - when ArrayLiteral - parts = node.receiver.contents&.parts || [] - - if parts.none? { |part| part.is_a?(ArgStar) } && - RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "max" - visit(node.receiver.contents) - iseq.opt_newarray_max(parts.length) - return - when "min" - visit(node.receiver.contents) - iseq.opt_newarray_min(parts.length) - return - end - end - when StringLiteral - if RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "-@" - iseq.opt_str_uminus(node.receiver.parts.first.value) - return - when "freeze" - iseq.opt_str_freeze(node.receiver.parts.first.value) - return - end - end - end - end - - if node.receiver - if node.receiver.is_a?(VarRef) - lookup = iseq.local_variable(node.receiver.value.value.to_sym) - - if lookup.local.is_a?(YARV::LocalTable::BlockLocal) - iseq.getblockparamproxy(lookup.index, lookup.level) - else - visit(node.receiver) - end - else - visit(node.receiver) - end - else - iseq.putself - end - - branchnil = - if node.operator&.value == "&." - iseq.dup - iseq.branchnil(-1) - end - - flag = 0 - - arg_parts.each do |arg_part| - case arg_part - when ArgBlock - argc -= 1 - flag |= YARV::VM_CALL_ARGS_BLOCKARG - visit(arg_part) - when ArgStar - flag |= YARV::VM_CALL_ARGS_SPLAT - visit(arg_part) - when ArgsForward - flag |= YARV::VM_CALL_ARGS_SPLAT | YARV::VM_CALL_ARGS_BLOCKARG - - lookup = iseq.local_table.find(:*, 0) - iseq.getlocal(lookup.index, lookup.level) - iseq.splatarray(arg_parts.length != 1) - - lookup = iseq.local_table.find(:&, 0) - iseq.getblockparamproxy(lookup.index, lookup.level) - when BareAssocHash - flag |= YARV::VM_CALL_KW_SPLAT - visit(arg_part) - else - visit(arg_part) - end - end - - block_iseq = visit(node.block) if node.block - flag |= YARV::VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 - flag |= YARV::VM_CALL_FCALL if node.receiver.nil? - - iseq.send(node.message.value.to_sym, argc, flag, block_iseq) - branchnil.patch!(iseq) if branchnil - end - - def visit_case(node) - visit(node.value) if node.value - - clauses = [] - else_clause = nil - current = node.consequent - - while current - clauses << current - - if (current = current.consequent).is_a?(Else) - else_clause = current - break - end - end - - branches = - clauses.map do |clause| - visit(clause.arguments) - iseq.topn(1) - iseq.send(:===, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) - [clause, iseq.branchif(:label_00)] - end - - iseq.pop - else_clause ? visit(else_clause) : iseq.putnil - iseq.leave - - branches.each_with_index do |(clause, branchif), index| - iseq.leave if index != 0 - branchif.patch!(iseq) - iseq.pop - visit(clause) - end - end - - def visit_class(node) - name = node.constant.constant.value.to_sym - class_iseq = - with_child_iseq(iseq.class_child_iseq(name, node.location)) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - flags = YARV::DefineClass::TYPE_CLASS - - case node.constant - when ConstPathRef - flags |= YARV::DefineClass::FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - when TopConstRef - flags |= YARV::DefineClass::FLAG_SCOPED - iseq.putobject(Object) - end - - if node.superclass - flags |= YARV::DefineClass::FLAG_HAS_SUPERCLASS - visit(node.superclass) - else - iseq.putnil - end - - iseq.defineclass(name, class_iseq, flags) - end - - def visit_command(node) - visit_call( - CommandCall.new( - receiver: nil, - operator: nil, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_command_call(node) - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_const_path_field(node) - visit(node.parent) - end - - def visit_const_path_ref(node) - names = constant_names(node) - iseq.opt_getconstant_path(names) - end - - def visit_def(node) - name = node.name.value.to_sym - method_iseq = iseq.method_child_iseq(name.to_s, node.location) - - with_child_iseq(method_iseq) do - visit(node.params) if node.params - iseq.event(:RUBY_EVENT_CALL) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_RETURN) - iseq.leave - end - - if node.target - visit(node.target) - iseq.definesmethod(name, method_iseq) - else - iseq.definemethod(name, method_iseq) - end - - iseq.putobject(name) - end - - def visit_defined(node) - case node.value - when Assign - # If we're assigning to a local variable, then we need to make sure - # that we put it into the local table. - if node.value.target.is_a?(VarField) && - node.value.target.value.is_a?(Ident) - iseq.local_table.plain(node.value.target.value.value.to_sym) - end - - iseq.putobject("assignment") - when VarRef - value = node.value.value - name = value.value.to_sym - - case value - when Const - iseq.putnil - iseq.defined(YARV::Defined::TYPE_CONST, name, "constant") - when CVar - iseq.putnil - iseq.defined(YARV::Defined::TYPE_CVAR, name, "class variable") - when GVar - iseq.putnil - iseq.defined(YARV::Defined::TYPE_GVAR, name, "global-variable") - when Ident - iseq.putobject("local-variable") - when IVar - iseq.putnil - iseq.defined(YARV::Defined::TYPE_IVAR, name, "instance-variable") - when Kw - case name - when :false - iseq.putobject("false") - when :nil - iseq.putobject("nil") - when :self - iseq.putobject("self") - when :true - iseq.putobject("true") - end - end - when VCall - iseq.putself - - name = node.value.value.value.to_sym - iseq.defined(YARV::Defined::TYPE_FUNC, name, "method") - when YieldNode - iseq.putnil - iseq.defined(YARV::Defined::TYPE_YIELD, false, "yield") - when ZSuper - iseq.putnil - iseq.defined(YARV::Defined::TYPE_ZSUPER, false, "super") - else - iseq.putobject("expression") - end - end - - def visit_dyna_symbol(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - iseq.putobject(node.parts.first.value.to_sym) - end - end - - def visit_else(node) - visit(node.statements) - iseq.pop unless last_statement? - end - - def visit_elsif(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.statements, - consequent: node.consequent, - location: node.location - ) - ) - end - - def visit_field(node) - visit(node.parent) - end - - def visit_float(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_for(node) - visit(node.collection) - - name = node.index.value.value.to_sym - iseq.local_table.plain(name) - - block_iseq = - with_child_iseq(iseq.block_child_iseq(node.statements.location)) do - iseq.argument_options[:lead_num] ||= 0 - iseq.argument_options[:lead_num] += 1 - iseq.argument_options[:ambiguous_param0] = true - - iseq.argument_size += 1 - iseq.local_table.plain(2) - - iseq.getlocal(0, 0) - - local_variable = iseq.local_variable(name) - iseq.setlocal(local_variable.index, local_variable.level) - - iseq.event(:RUBY_EVENT_B_CALL) - iseq.nop - - visit(node.statements) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.send(:each, 0, 0, block_iseq) - end - - def visit_hash(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duphash(compiled) - else - visit_all(node.assocs) - iseq.newhash(node.assocs.length * 2) - end - end - - def visit_heredoc(node) - if node.beginning.value.end_with?("`") - visit_xstring_literal(node) - elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_if(node) - if node.predicate.is_a?(RangeNode) - iseq.getspecial(YARV::VM_SVAR_FLIPFLOP_START, 0) - branchif = iseq.branchif(-1) - - visit(node.predicate.left) - branchunless_true = iseq.branchunless(-1) - - iseq.putobject(true) - iseq.setspecial(YARV::VM_SVAR_FLIPFLOP_START) - branchif.patch!(iseq) - - visit(node.predicate.right) - branchunless_false = iseq.branchunless(-1) - - iseq.putobject(false) - iseq.setspecial(YARV::VM_SVAR_FLIPFLOP_START) - branchunless_false.patch!(iseq) - - visit(node.statements) - iseq.leave - branchunless_true.patch!(iseq) - iseq.putnil - else - visit(node.predicate) - branchunless = iseq.branchunless(-1) - visit(node.statements) - - if last_statement? - iseq.leave - branchunless.patch!(iseq) - - node.consequent ? visit(node.consequent) : iseq.putnil - else - iseq.pop - - if node.consequent - jump = iseq.jump(-1) - branchunless.patch!(iseq) - visit(node.consequent) - jump[1] = iseq.label - else - branchunless.patch!(iseq) - end - end - end - end - - def visit_if_op(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.truthy, - consequent: - Else.new( - keyword: Kw.new(value: "else", location: Location.default), - statements: node.falsy, - location: Location.default - ), - location: Location.default - ) - ) - end - - def visit_imaginary(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_int(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_kwrest_param(node) - iseq.argument_options[:kwrest] = iseq.argument_size - iseq.argument_size += 1 - iseq.local_table.plain(node.name.value.to_sym) - end - - def visit_label(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_lambda(node) - lambda_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_B_CALL) - visit(node.params) - visit(node.statements) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.send(:lambda, 0, YARV::VM_CALL_FCALL, lambda_iseq) - end - - def visit_lambda_var(node) - visit_block_var(node) - end - - def visit_massign(node) - visit(node.value) - iseq.dup - visit(node.target) - end - - def visit_method_add_block(node) - visit_call( - CommandCall.new( - receiver: node.call.receiver, - operator: node.call.operator, - message: node.call.message, - arguments: node.call.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_mlhs(node) - lookups = [] - node.parts.each do |part| - case part - when VarField - lookups << visit(part) - end - end - - iseq.expandarray(lookups.length, 0) - lookups.each { |lookup| iseq.setlocal(lookup.index, lookup.level) } - end - - def visit_module(node) - name = node.constant.constant.value.to_sym - module_iseq = - with_child_iseq(iseq.module_child_iseq(name, node.location)) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - flags = YARV::DefineClass::TYPE_MODULE - - case node.constant - when ConstPathRef - flags |= YARV::DefineClass::FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - when TopConstRef - flags |= YARV::DefineClass::FLAG_SCOPED - iseq.putobject(Object) - end - - iseq.putnil - iseq.defineclass(name, module_iseq, flags) - end - - def visit_mrhs(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - visit_all(node.parts) - iseq.newarray(node.parts.length) - end - end - - def visit_not(node) - visit(node.statement) - iseq.send(:!, 0) - end - - def visit_opassign(node) - flag = YARV::VM_CALL_ARGS_SIMPLE - if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) - flag |= YARV::VM_CALL_FCALL - end - - case (operator = node.operator.value.chomp("=").to_sym) - when :"&&" - branchunless = nil - - with_opassign(node) do - iseq.dup - branchunless = iseq.branchunless(-1) - iseq.pop - visit(node.value) - end - - case node.target - when ARefField - iseq.leave - branchunless.patch!(iseq) - iseq.setn(3) - iseq.adjuststack(3) - when ConstPathField, TopConstField - branchunless.patch!(iseq) - iseq.swap - iseq.pop - else - branchunless.patch!(iseq) - end - when :"||" - if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) - opassign_defined(node) - iseq.swap - iseq.pop - elsif node.target.is_a?(VarField) && - [Const, CVar, GVar].include?(node.target.value.class) - opassign_defined(node) - else - branchif = nil - - with_opassign(node) do - iseq.dup - branchif = iseq.branchif(-1) - iseq.pop - visit(node.value) - end - - if node.target.is_a?(ARefField) - iseq.leave - branchif.patch!(iseq) - iseq.setn(3) - iseq.adjuststack(3) - else - branchif.patch!(iseq) - end - end - else - with_opassign(node) do - visit(node.value) - iseq.send(operator, 1, flag) - end - end - end - - def visit_params(node) - argument_options = iseq.argument_options - - if node.requireds.any? - argument_options[:lead_num] = 0 - - node.requireds.each do |required| - iseq.local_table.plain(required.value.to_sym) - iseq.argument_size += 1 - argument_options[:lead_num] += 1 - end - end - - node.optionals.each do |(optional, value)| - index = iseq.local_table.size - name = optional.value.to_sym - - iseq.local_table.plain(name) - iseq.argument_size += 1 - - argument_options[:opt] = [iseq.label] unless argument_options.key?(:opt) - - visit(value) - iseq.setlocal(index, 0) - iseq.argument_options[:opt] << iseq.label - end - - visit(node.rest) if node.rest - - if node.posts.any? - argument_options[:post_start] = iseq.argument_size - argument_options[:post_num] = 0 - - node.posts.each do |post| - iseq.local_table.plain(post.value.to_sym) - iseq.argument_size += 1 - argument_options[:post_num] += 1 - end - end - - if node.keywords.any? - argument_options[:kwbits] = 0 - argument_options[:keyword] = [] - - keyword_bits_name = node.keyword_rest ? 3 : 2 - iseq.argument_size += 1 - keyword_bits_index = iseq.local_table.locals.size + node.keywords.size - - node.keywords.each_with_index do |(keyword, value), keyword_index| - name = keyword.value.chomp(":").to_sym - index = iseq.local_table.size - - iseq.local_table.plain(name) - iseq.argument_size += 1 - argument_options[:kwbits] += 1 - - if value.nil? - argument_options[:keyword] << name - elsif (compiled = RubyVisitor.compile(value)) - argument_options[:keyword] << [name, compiled] - else - argument_options[:keyword] << [name] - iseq.checkkeyword(keyword_bits_index, keyword_index) - branchif = iseq.branchif(-1) - visit(value) - iseq.setlocal(index, 0) - branchif.patch!(iseq) - end - end - - iseq.local_table.plain(keyword_bits_name) - end - - if node.keyword_rest.is_a?(ArgsForward) - iseq.local_table.plain(:*) - iseq.local_table.plain(:&) - - iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_options[:block_start] = iseq.argument_size + 1 - - iseq.argument_size += 2 - elsif node.keyword_rest - visit(node.keyword_rest) - end - - visit(node.block) if node.block - end - - def visit_paren(node) - visit(node.contents) - end - - def visit_program(node) - node.statements.body.each do |statement| - break unless statement.is_a?(Comment) - - if statement.value == "# frozen_string_literal: true" - @frozen_string_literal = true - end - end - - preexes = [] - statements = [] - - node.statements.body.each do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - # ignore - when BEGINBlock - preexes << statement - else - statements << statement - end - end - - top_iseq = - YARV::InstructionSequence.new( - :top, - "", - nil, - node.location, - frozen_string_literal: frozen_string_literal, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) - - with_child_iseq(top_iseq) do - visit_all(preexes) - - if statements.empty? - iseq.putnil - else - *statements, last_statement = statements - visit_all(statements) - with_last_statement { visit(last_statement) } - end - - iseq.leave - end - end - - def visit_qsymbols(node) - iseq.duparray(node.accept(RubyVisitor.new)) - end - - def visit_qwords(node) - if frozen_string_literal - iseq.duparray(node.accept(RubyVisitor.new)) - else - visit_all(node.elements) - iseq.newarray(node.elements.length) - end - end - - def visit_range(node) - if (compiled = RubyVisitor.compile(node)) - iseq.putobject(compiled) - else - visit(node.left) - visit(node.right) - iseq.newrange(node.operator.value == ".." ? 0 : 1) - end - end - - def visit_rassign(node) - iseq.putnil - - if node.operator.is_a?(Kw) - jumps = [] - - visit(node.value) - iseq.dup - - case node.pattern - when VarField - lookup = visit(node.pattern) - iseq.setlocal(lookup.index, lookup.level) - jumps << iseq.jump(-1) - else - jumps.concat(visit(node.pattern)) - end - - iseq.pop - iseq.pop - iseq.putobject(false) - iseq.leave - - jumps.each { |jump| jump[1] = iseq.label } - iseq.adjuststack(2) - iseq.putobject(true) - else - jumps_to_match = [] - - iseq.putnil - iseq.putobject(false) - iseq.putnil - iseq.putnil - visit(node.value) - iseq.dup - - # Visit the pattern. If it matches, - case node.pattern - when VarField - lookup = visit(node.pattern) - iseq.setlocal(lookup.index, lookup.level) - jumps_to_match << iseq.jump(-1) - else - jumps_to_match.concat(visit(node.pattern)) - end - - # First we're going to push the core onto the stack, then we'll check if - # the value to match is truthy. If it is, we'll jump down to raise - # NoMatchingPatternKeyError. Otherwise we'll raise - # NoMatchingPatternError. - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.topn(4) - branchif_no_key = iseq.branchif(-1) - - # Here we're going to raise NoMatchingPatternError. - iseq.putobject(NoMatchingPatternError) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.putobject("%p: %s") - iseq.topn(4) - iseq.topn(7) - iseq.send(:"core#sprintf", 3) - iseq.send(:"core#raise", 2) - jump_to_exit = iseq.jump(-1) - - # Here we're going to raise NoMatchingPatternKeyError. - branchif_no_key.patch!(iseq) - iseq.putobject(NoMatchingPatternKeyError) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.putobject("%p: %s") - iseq.topn(4) - iseq.topn(7) - iseq.send(:"core#sprintf", 3) - iseq.topn(7) - iseq.topn(9) - - # Super special behavior here because of the weird kw_arg handling. - iseq.stack.change_by(-(1 + 1) + 1) - call_data = { mid: :new, flag: YARV::VM_CALL_KWARG, orig_argc: 1, kw_arg: [:matchee, :key] } - - if specialized_instruction - iseq.push([:opt_send_without_block, call_data]) - else - iseq.push([:send, call_data, nil]) - end - - iseq.send(:"core#raise", 1) - - # This runs when the pattern fails to match. - jump_to_exit[1] = iseq.label - iseq.adjuststack(7) - iseq.putnil - iseq.leave - - # This runs when the pattern matches successfully. - jumps_to_match.each { |jump| jump[1] = iseq.label } - iseq.adjuststack(6) - iseq.putnil - end - end - - def visit_rational(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_regexp_literal(node) - if (compiled = RubyVisitor.compile(node)) - iseq.putobject(compiled) - else - flags = RubyVisitor.new.visit_regexp_literal_flags(node) - length = visit_string_parts(node) - iseq.toregexp(flags, length) - end - end - - def visit_rest_param(node) - iseq.local_table.plain(node.name.value.to_sym) - iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_size += 1 - end - - def visit_sclass(node) - visit(node.target) - iseq.putnil - - singleton_iseq = - with_child_iseq(iseq.singleton_class_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - iseq.defineclass( - :singletonclass, - singleton_iseq, - YARV::DefineClass::TYPE_SINGLETON_CLASS - ) - end - - def visit_statements(node) - statements = - node.body.select do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - false - else - true - end - end - - statements.empty? ? iseq.putnil : visit_all(statements) - end - - def visit_string_concat(node) - value = node.left.parts.first.value + node.right.parts.first.value - - visit_string_literal( - StringLiteral.new( - parts: [TStringContent.new(value: value, location: node.location)], - quote: node.left.quote, - location: node.location - ) - ) - end - - def visit_string_embexpr(node) - visit(node.statements) - end - - def visit_string_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_super(node) - iseq.putself - visit(node.arguments) - iseq.invokesuper( - nil, - argument_parts(node.arguments).length, - YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER, - nil - ) - end - - def visit_symbol_literal(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_symbols(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - node.elements.each do |element| - if element.parts.length == 1 && - element.parts.first.is_a?(TStringContent) - iseq.putobject(element.parts.first.value.to_sym) - else - length = visit_string_parts(element) - iseq.concatstrings(length) - iseq.intern - end - end - - iseq.newarray(node.elements.length) - end - end - - def visit_top_const_ref(node) - iseq.opt_getconstant_path(constant_names(node)) - end - - def visit_tstring_content(node) - if frozen_string_literal - iseq.putobject(node.accept(RubyVisitor.new)) - else - iseq.putstring(node.accept(RubyVisitor.new)) - end - end - - def visit_unary(node) - method_id = - case node.operator - when "+", "-" - "#{node.operator}@" - else - node.operator - end - - visit_call( - CommandCall.new( - receiver: node.statement, - operator: nil, - message: Ident.new(value: method_id, location: Location.default), - arguments: nil, - block: nil, - location: Location.default - ) - ) - end - - def visit_undef(node) - node.symbols.each_with_index do |symbol, index| - iseq.pop if index != 0 - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) - visit(symbol) - iseq.send(:"core#undef_method", 2) - end - end - - def visit_unless(node) - visit(node.predicate) - branchunless = iseq.branchunless(-1) - node.consequent ? visit(node.consequent) : iseq.putnil - - if last_statement? - iseq.leave - branchunless.patch!(iseq) - - visit(node.statements) - else - iseq.pop - - if node.consequent - jump = iseq.jump(-1) - branchunless.patch!(iseq) - visit(node.consequent) - jump[1] = iseq.label - else - branchunless.patch!(iseq) - end - end - end - - def visit_until(node) - jumps = [] - - jumps << iseq.jump(-1) - iseq.putnil - iseq.pop - jumps << iseq.jump(-1) - - label = iseq.label - visit(node.statements) - iseq.pop - jumps.each { |jump| jump[1] = iseq.label } - - visit(node.predicate) - iseq.branchunless(label) - iseq.putnil if last_statement? - end - - def visit_var_field(node) - case node.value - when CVar, IVar - name = node.value.value.to_sym - iseq.inline_storage_for(name) - when Ident - name = node.value.value.to_sym - - if (local_variable = iseq.local_variable(name)) - local_variable - else - iseq.local_table.plain(name) - iseq.local_variable(name) - end - end - end - - def visit_var_ref(node) - case node.value - when Const - iseq.opt_getconstant_path(constant_names(node)) - when CVar - name = node.value.value.to_sym - iseq.getclassvariable(name) - when GVar - iseq.getglobal(node.value.value.to_sym) - when Ident - lookup = iseq.local_variable(node.value.value.to_sym) - - case lookup.local - when YARV::LocalTable::BlockLocal - iseq.getblockparam(lookup.index, lookup.level) - when YARV::LocalTable::PlainLocal - iseq.getlocal(lookup.index, lookup.level) - end - when IVar - name = node.value.value.to_sym - iseq.getinstancevariable(name) - when Kw - case node.value.value - when "false" - iseq.putobject(false) - when "nil" - iseq.putnil - when "self" - iseq.putself - when "true" - iseq.putobject(true) - end - end - end - - def visit_vcall(node) - iseq.putself - - flag = - YARV::VM_CALL_FCALL | YARV::VM_CALL_VCALL | YARV::VM_CALL_ARGS_SIMPLE - iseq.send(node.value.value.to_sym, 0, flag) - end - - def visit_when(node) - visit(node.statements) - end - - def visit_while(node) - jumps = [] - - jumps << iseq.jump(-1) - iseq.putnil - iseq.pop - jumps << iseq.jump(-1) - - label = iseq.label - visit(node.statements) - iseq.pop - jumps.each { |jump| jump[1] = iseq.label } - - visit(node.predicate) - iseq.branchif(label) - iseq.putnil if last_statement? - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_words(node) - if frozen_string_literal && (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - visit_all(node.elements) - iseq.newarray(node.elements.length) - end - end - - def visit_xstring_literal(node) - iseq.putself - length = visit_string_parts(node) - iseq.concatstrings(node.parts.length) if length > 1 - iseq.send(:`, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) - end - - def visit_yield(node) - parts = argument_parts(node.arguments) - visit_all(parts) - iseq.invokeblock(nil, parts.length) - end - - def visit_zsuper(_node) - iseq.putself - iseq.invokesuper( - nil, - 0, - YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER | - YARV::VM_CALL_ZSUPER, - nil - ) - end - - private - - # This is a helper that is used in places where arguments may be present - # or they may be wrapped in parentheses. It's meant to descend down the - # tree and return an array of argument nodes. - def argument_parts(node) - case node - when nil - [] - when Args - node.parts - when ArgParen - if node.arguments.is_a?(ArgsForward) - [node.arguments] - else - node.arguments.parts - end - when Paren - node.contents.parts - end - end - - # Constant names when they are being assigned or referenced come in as a - # tree, but it's more convenient to work with them as an array. This - # method converts them into that array. This is nice because it's the - # operand that goes to opt_getconstant_path in Ruby 3.2. - def constant_names(node) - current = node - names = [] - - while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) - names.unshift(current.constant.value.to_sym) - current = current.parent - end - - case current - when VarField, VarRef - names.unshift(current.value.value.to_sym) - when TopConstRef - names.unshift(current.constant.value.to_sym) - names.unshift(:"") - end - - names - end - - # For the most part when an OpAssign (operator assignment) node with a ||= - # operator is being compiled it's a matter of reading the target, checking - # if the value should be evaluated, evaluating it if so, and then writing - # the result back to the target. - # - # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we - # first check if the value is defined using the defined instruction. I - # don't know why it is necessary, and suspect that it isn't. - def opassign_defined(node) - case node.target - when ConstPathField - visit(node.target.parent) - name = node.target.constant.value.to_sym - - iseq.dup - iseq.defined(YARV::Defined::TYPE_CONST_FROM, name, true) - when TopConstField - name = node.target.constant.value.to_sym - - iseq.putobject(Object) - iseq.dup - iseq.defined(YARV::Defined::TYPE_CONST_FROM, name, true) - when VarField - name = node.target.value.value.to_sym - iseq.putnil - - case node.target.value - when Const - iseq.defined(YARV::Defined::TYPE_CONST, name, true) - when CVar - iseq.defined(YARV::Defined::TYPE_CVAR, name, true) - when GVar - iseq.defined(YARV::Defined::TYPE_GVAR, name, true) - end - end - - branchunless = iseq.branchunless(-1) - - case node.target - when ConstPathField, TopConstField - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - when VarField - case node.target.value - when Const - iseq.opt_getconstant_path(constant_names(node.target)) - when CVar - iseq.getclassvariable(name) - when GVar - iseq.getglobal(name) - end - end - - iseq.dup - branchif = iseq.branchif(-1) - iseq.pop - - branchunless.patch!(iseq) - visit(node.value) - - case node.target - when ConstPathField, TopConstField - iseq.dupn(2) - iseq.swap - iseq.setconstant(name) - when VarField - iseq.dup - - case node.target.value - when Const - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - iseq.setconstant(name) - when CVar - iseq.setclassvariable(name) - when GVar - iseq.setglobal(name) - end - end - - branchif.patch!(iseq) - end - - # Whenever a value is interpolated into a string-like structure, these - # three instructions are pushed. - def push_interpolate - iseq.dup - iseq.objtostring( - :to_s, - 0, - YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE - ) - iseq.anytostring - end - - # There are a lot of nodes in the AST that act as contains of parts of - # strings. This includes things like string literals, regular expressions, - # heredocs, etc. This method will visit all the parts of a string within - # those containers. - def visit_string_parts(node) - length = 0 - - unless node.parts.first.is_a?(TStringContent) - iseq.putobject("") - length += 1 - end - - node.parts.each do |part| - case part - when StringDVar - visit(part.variable) - push_interpolate - when StringEmbExpr - visit(part) - push_interpolate - when TStringContent - iseq.putobject(part.accept(RubyVisitor.new)) - end - - length += 1 - end - - length - end - - # The current instruction sequence that we're compiling is always stored - # on the compiler. When we descend into a node that has its own - # instruction sequence, this method can be called to temporarily set the - # new value of the instruction sequence, yield, and then set it back. - def with_child_iseq(child_iseq) - parent_iseq = iseq - - begin - @iseq = child_iseq - yield - child_iseq - ensure - @iseq = parent_iseq - end - end - - # When we're compiling the last statement of a set of statements within a - # scope, the instructions sometimes change from pops to leaves. These - # kinds of peephole optimizations can reduce the overall number of - # instructions. Therefore, we keep track of whether we're compiling the - # last statement of a scope and allow visit methods to query that - # information. - def with_last_statement - previous = @last_statement - @last_statement = true - - begin - yield - ensure - @last_statement = previous - end - end - - def last_statement? - @last_statement - end - - # OpAssign nodes can have a number of different kinds of nodes as their - # "target" (i.e., the left-hand side of the assignment). When compiling - # these nodes we typically need to first fetch the current value of the - # variable, then perform some kind of action, then store the result back - # into the variable. This method handles that by first fetching the value, - # then yielding to the block, then storing the result. - def with_opassign(node) - case node.target - when ARefField - iseq.putnil - visit(node.target.collection) - visit(node.target.index) - - iseq.dupn(2) - iseq.send(:[], 1) - - yield - - iseq.setn(3) - iseq.send(:[]=, 2) - iseq.pop - when ConstPathField - name = node.target.constant.value.to_sym - - visit(node.target.parent) - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - - yield - - if node.operator.value == "&&=" - iseq.dupn(2) - else - iseq.swap - iseq.topn(1) - end - - iseq.swap - iseq.setconstant(name) - when TopConstField - name = node.target.constant.value.to_sym - - iseq.putobject(Object) - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - - yield - - if node.operator.value == "&&=" - iseq.dupn(2) - else - iseq.swap - iseq.topn(1) - end - - iseq.swap - iseq.setconstant(name) - when VarField - case node.target.value - when Const - names = constant_names(node.target) - iseq.opt_getconstant_path(names) - - yield - - iseq.dup - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - iseq.setconstant(names.last) - when CVar - name = node.target.value.value.to_sym - iseq.getclassvariable(name) - - yield - - iseq.dup - iseq.setclassvariable(name) - when GVar - name = node.target.value.value.to_sym - iseq.getglobal(name) - - yield - - iseq.dup - iseq.setglobal(name) - when Ident - local_variable = visit(node.target) - iseq.getlocal(local_variable.index, local_variable.level) - - yield - - iseq.dup - iseq.setlocal(local_variable.index, local_variable.level) - when IVar - name = node.target.value.value.to_sym - iseq.getinstancevariable(name) - - yield - - iseq.dup - iseq.setinstancevariable(name) - end - end - end - end -end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 89920c6a..df8bc3ce 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -1,854 +1,11 @@ # frozen_string_literal: true module SyntaxTree + # This module provides an object representation of the YARV bytecode. module YARV - # This object is used to track the size of the stack at any given time. It - # is effectively a mini symbolic interpreter. It's necessary because when - # instruction sequences get serialized they include a :stack_max field on - # them. This field is used to determine how much stack space to allocate - # for the instruction sequence. - class Stack - attr_reader :current_size, :maximum_size - - def initialize - @current_size = 0 - @maximum_size = 0 - end - - def change_by(value) - @current_size += value - @maximum_size = @current_size if @current_size > @maximum_size - end + # Compile the given source into a YARV instruction sequence. + def self.compile(source, **options) + SyntaxTree.parse(source).accept(Compiler.new(**options)) end - - # This represents every local variable associated with an instruction - # sequence. There are two kinds of locals: plain locals that are what you - # expect, and block proxy locals, which represent local variables - # associated with blocks that were passed into the current instruction - # sequence. - class LocalTable - # A local representing a block passed into the current instruction - # sequence. - class BlockLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # A regular local variable. - class PlainLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # The result of looking up a local variable in the current local table. - class Lookup - attr_reader :local, :index, :level - - def initialize(local, index, level) - @local = local - @index = index - @level = level - end - end - - attr_reader :locals - - def initialize - @locals = [] - end - - def find(name, level) - index = locals.index { |local| local.name == name } - Lookup.new(locals[index], index, level) if index - end - - def has?(name) - locals.any? { |local| local.name == name } - end - - def names - locals.map(&:name) - end - - def size - locals.length - end - - # Add a BlockLocal to the local table. - def block(name) - locals << BlockLocal.new(name) unless has?(name) - end - - # Add a PlainLocal to the local table. - def plain(name) - locals << PlainLocal.new(name) unless has?(name) - end - - # This is the offset from the top of the stack where this local variable - # lives. - def offset(index) - size - (index - 3) - 1 - end - end - - # This class is meant to mirror RubyVM::InstructionSequence. It contains a - # list of instructions along with the metadata pertaining to them. It also - # functions as a builder for the instruction sequence. - class InstructionSequence - MAGIC = "YARVInstructionSequence/SimpleDataFormat" - - # This provides a handle to the rb_iseq_load function, which allows you to - # pass a serialized iseq to Ruby and have it return a - # RubyVM::InstructionSequence object. - ISEQ_LOAD = - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - - # The type of the instruction sequence. - attr_reader :type - - # The name of the instruction sequence. - attr_reader :name - - # The parent instruction sequence, if there is one. - attr_reader :parent_iseq - - # The location of the root node of this instruction sequence. - attr_reader :location - - # This is the list of information about the arguments to this - # instruction sequence. - attr_accessor :argument_size - attr_reader :argument_options - - # The list of instructions for this instruction sequence. - attr_reader :insns - - # The table of local variables. - attr_reader :local_table - - # The hash of names of instance and class variables pointing to the - # index of their associated inline storage. - attr_reader :inline_storages - - # The index of the next inline storage that will be created. - attr_reader :storage_index - - # An object that will track the current size of the stack and the - # maximum size of the stack for this instruction sequence. - attr_reader :stack - - # These are various compilation options provided. - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - def initialize( - type, - name, - parent_iseq, - location, - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @type = type - @name = name - @parent_iseq = parent_iseq - @location = location - - @argument_size = 0 - @argument_options = {} - - @local_table = LocalTable.new - @inline_storages = {} - @insns = [] - @storage_index = 0 - @stack = Stack.new - - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - end - - ########################################################################## - # Query methods - ########################################################################## - - def local_variable(name, level = 0) - if (lookup = local_table.find(name, level)) - lookup - elsif parent_iseq - parent_iseq.local_variable(name, level + 1) - end - end - - def inline_storage - storage = storage_index - @storage_index += 1 - storage - end - - def inline_storage_for(name) - inline_storages[name] = inline_storage unless inline_storages.key?(name) - - inline_storages[name] - end - - def length - insns.inject(0) do |sum, insn| - case insn - when Integer, Symbol - sum - else - sum + insn.length - end - end - end - - def eval - compiled = to_a - - # Temporary hack until we get these working. - compiled[4][:node_id] = 11 - compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] - - Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval - end - - def to_a - versions = RUBY_VERSION.split(".").map(&:to_i) - - [ - MAGIC, - versions[0], - versions[1], - 1, - { - arg_size: argument_size, - local_size: local_table.size, - stack_max: stack.maximum_size - }, - name, - "", - "", - location.start_line, - type, - local_table.names, - argument_options, - [], - insns.map do |insn| - case insn - when Integer, Symbol - insn - when Array - case insn[0] - when :setlocal_WC_0, :setlocal_WC_1, :setlocal, :setblockparam - iseq = self - - case insn[0] - when :setlocal_WC_1 - iseq = iseq.parent_iseq - when :setlocal, :setblockparam - insn[2].times { iseq = iseq.parent_iseq } - end - - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - when :once - [insn[0], insn[1].to_a, insn[2]] - else - insn - end - else - insn.to_a(self) - end - end - ] - end - - ########################################################################## - # Child instruction sequence methods - ########################################################################## - - def child_iseq(type, name, location) - InstructionSequence.new( - type, - name, - self, - location, - frozen_string_literal: frozen_string_literal, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) - end - - def block_child_iseq(location) - current = self - current = current.parent_iseq while current.type == :block - child_iseq(:block, "block in #{current.name}", location) - end - - def class_child_iseq(name, location) - child_iseq(:class, "", location) - end - - def method_child_iseq(name, location) - child_iseq(:method, name, location) - end - - def module_child_iseq(name, location) - child_iseq(:class, "", location) - end - - def singleton_class_child_iseq(location) - child_iseq(:class, "singleton class", location) - end - - ########################################################################## - # Instruction push methods - ########################################################################## - - def push(insn) - insns << insn - - case insn - when Integer, Symbol, Array - insn - else - stack.change_by(-insn.pops + insn.pushes) - insn - end - end - - # This creates a new label at the current length of the instruction - # sequence. It is used as the operand for jump instructions. - def label - name = :"label_#{length}" - insns.last == name ? name : event(name) - end - - def event(name) - push(name) - end - - def adjuststack(number) - push(AdjustStack.new(number)) - end - - def anytostring - push(AnyToString.new) - end - - def branchif(label) - push(BranchIf.new(label)) - end - - def branchnil(label) - push(BranchNil.new(label)) - end - - def branchunless(label) - push(BranchUnless.new(label)) - end - - def checkkeyword(keyword_bits_index, keyword_index) - push(CheckKeyword.new(keyword_bits_index, keyword_index)) - end - - def checkmatch(flag) - stack.change_by(-2 + 1) - push([:checkmatch, flag]) - end - - def checktype(type) - stack.change_by(-1 + 2) - push([:checktype, type]) - end - - def concatarray - push(ConcatArray.new) - end - - def concatstrings(number) - push(ConcatStrings.new(number)) - end - - def defined(type, name, message) - push(Defined.new(type, name, message)) - end - - def defineclass(name, class_iseq, flags) - push(DefineClass.new(name, class_iseq, flags)) - end - - def definemethod(name, method_iseq) - push(DefineMethod.new(name, method_iseq)) - end - - def definesmethod(name, method_iseq) - push(DefineSMethod.new(name, method_iseq)) - end - - def dup - push(Dup.new) - end - - def duparray(object) - push(DupArray.new(object)) - end - - def duphash(object) - push(DupHash.new(object)) - end - - def dupn(number) - push(DupN.new(number)) - end - - def expandarray(length, flags) - push(ExpandArray.new(length, flags)) - end - - def getblockparam(index, level) - push(GetBlockParam.new(index, level)) - end - - def getblockparamproxy(index, level) - push(GetBlockParamProxy.new(index, level)) - end - - def getclassvariable(name) - if RUBY_VERSION < "3.0" - push(Legacy::GetClassVariable.new(name)) - else - push(GetClassVariable.new(name, inline_storage_for(name))) - end - end - - def getconstant(name) - push(Legacy::GetConstant.new(name)) - end - - def getglobal(name) - push(GetGlobal.new(name)) - end - - def getinstancevariable(name) - if RUBY_VERSION < "3.2" - push(GetInstanceVariable.new(name, inline_storage_for(name))) - else - push(GetInstanceVariable.new(name, inline_storage)) - end - end - - def getlocal(index, level) - if operands_unification - # Specialize the getlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will look at the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - push(GetLocalWC0.new(index)) - when 1 - push(GetLocalWC1.new(index)) - else - push(GetLocal.new(index, level)) - end - else - push(GetLocal.new(index, level)) - end - end - - def getspecial(key, type) - stack.change_by(-0 + 1) - push([:getspecial, key, type]) - end - - def intern - stack.change_by(-1 + 1) - push([:intern]) - end - - def invokeblock(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) - stack.change_by(-argc + 1) - push([:invokeblock, call_data(method_id, argc, flag)]) - end - - def invokesuper(method_id, argc, flag, block_iseq) - stack.change_by(-(argc + 1) + 1) - - cdata = call_data(method_id, argc, flag) - push([:invokesuper, cdata, block_iseq]) - end - - def jump(index) - stack.change_by(0) - push([:jump, index]) - end - - def leave - stack.change_by(-1) - push([:leave]) - end - - def newarray(length) - stack.change_by(-length + 1) - push([:newarray, length]) - end - - def newarraykwsplat(length) - stack.change_by(-length + 1) - push([:newarraykwsplat, length]) - end - - def newhash(length) - stack.change_by(-length + 1) - push([:newhash, length]) - end - - def newrange(flag) - stack.change_by(-2 + 1) - push([:newrange, flag]) - end - - def nop - stack.change_by(0) - push([:nop]) - end - - def objtostring(method_id, argc, flag) - stack.change_by(-1 + 1) - push([:objtostring, call_data(method_id, argc, flag)]) - end - - def once(postexe_iseq, inline_storage) - stack.change_by(+1) - push([:once, postexe_iseq, inline_storage]) - end - - def opt_aref_with(object, method_id, argc, flag = VM_CALL_ARGS_SIMPLE) - stack.change_by(-1 + 1) - push([:opt_aref_with, object, call_data(method_id, argc, flag)]) - end - - def opt_aset_with(object, method_id, argc, flag = VM_CALL_ARGS_SIMPLE) - stack.change_by(-2 + 1) - push([:opt_aset_with, object, call_data(method_id, argc, flag)]) - end - - def opt_getconstant_path(names) - if RUBY_VERSION >= "3.2" - stack.change_by(+1) - push([:opt_getconstant_path, names]) - else - const_inline_storage = inline_storage - getinlinecache = opt_getinlinecache(-1, const_inline_storage) - - if names[0] == :"" - names.shift - pop - putobject(Object) - end - - names.each_with_index do |name, index| - putobject(index == 0) - getconstant(name) - end - - opt_setinlinecache(const_inline_storage) - getinlinecache[1] = label - end - end - - def opt_getinlinecache(offset, inline_storage) - stack.change_by(+1) - push([:opt_getinlinecache, offset, inline_storage]) - end - - def opt_newarray_max(length) - if specialized_instruction - stack.change_by(-length + 1) - push([:opt_newarray_max, length]) - else - newarray(length) - send(:max, 0) - end - end - - def opt_newarray_min(length) - if specialized_instruction - stack.change_by(-length + 1) - push([:opt_newarray_min, length]) - else - newarray(length) - send(:min, 0) - end - end - - def opt_setinlinecache(inline_storage) - stack.change_by(-1 + 1) - push([:opt_setinlinecache, inline_storage]) - end - - def opt_str_freeze(value) - if specialized_instruction - stack.change_by(+1) - push([:opt_str_freeze, value, call_data(:freeze, 0)]) - else - putstring(value) - send(:freeze, 0) - end - end - - def opt_str_uminus(value) - if specialized_instruction - stack.change_by(+1) - push([:opt_str_uminus, value, call_data(:-@, 0)]) - else - putstring(value) - send(:-@, 0) - end - end - - def pop - stack.change_by(-1) - push([:pop]) - end - - def putnil - stack.change_by(+1) - push([:putnil]) - end - - def putobject(object) - stack.change_by(+1) - - if operands_unification - # Specialize the putobject instruction based on the value of the - # object. If it's 0 or 1, then there's a specialized instruction - # that will push the object onto the stack and requires fewer - # operands. - if object.eql?(0) - push([:putobject_INT2FIX_0_]) - elsif object.eql?(1) - push([:putobject_INT2FIX_1_]) - else - push([:putobject, object]) - end - else - push([:putobject, object]) - end - end - - def putself - stack.change_by(+1) - push([:putself]) - end - - def putspecialobject(object) - stack.change_by(+1) - push([:putspecialobject, object]) - end - - def putstring(object) - stack.change_by(+1) - push([:putstring, object]) - end - - def send(method_id, argc, flag = VM_CALL_ARGS_SIMPLE, block_iseq = nil) - stack.change_by(-(argc + 1) + 1) - cdata = call_data(method_id, argc, flag) - - if specialized_instruction - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and the - # number of arguments. - - # stree-ignore - if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 - case [method_id, argc] - when [:length, 0] then push([:opt_length, cdata]) - when [:size, 0] then push([:opt_size, cdata]) - when [:empty?, 0] then push([:opt_empty_p, cdata]) - when [:nil?, 0] then push([:opt_nil_p, cdata]) - when [:succ, 0] then push([:opt_succ, cdata]) - when [:!, 0] then push([:opt_not, cdata]) - when [:+, 1] then push([:opt_plus, cdata]) - when [:-, 1] then push([:opt_minus, cdata]) - when [:*, 1] then push([:opt_mult, cdata]) - when [:/, 1] then push([:opt_div, cdata]) - when [:%, 1] then push([:opt_mod, cdata]) - when [:==, 1] then push([:opt_eq, cdata]) - when [:=~, 1] then push([:opt_regexpmatch2, cdata]) - when [:<, 1] then push([:opt_lt, cdata]) - when [:<=, 1] then push([:opt_le, cdata]) - when [:>, 1] then push([:opt_gt, cdata]) - when [:>=, 1] then push([:opt_ge, cdata]) - when [:<<, 1] then push([:opt_ltlt, cdata]) - when [:[], 1] then push([:opt_aref, cdata]) - when [:&, 1] then push([:opt_and, cdata]) - when [:|, 1] then push([:opt_or, cdata]) - when [:[]=, 2] then push([:opt_aset, cdata]) - when [:!=, 1] - push([:opt_neq, call_data(:==, 1), cdata]) - else - push([:opt_send_without_block, cdata]) - end - else - push([:send, cdata, block_iseq]) - end - else - push([:send, cdata, block_iseq]) - end - end - - def setblockparam(index, level) - stack.change_by(-1) - push([:setblockparam, index, level]) - end - - def setclassvariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.0" - push([:setclassvariable, name, inline_storage_for(name)]) - else - push([:setclassvariable, name]) - end - end - - def setconstant(name) - stack.change_by(-2) - push([:setconstant, name]) - end - - def setglobal(name) - stack.change_by(-1) - push([:setglobal, name]) - end - - def setinstancevariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.2" - push([:setinstancevariable, name, inline_storage]) - else - push([:setinstancevariable, name, inline_storage_for(name)]) - end - end - - def setlocal(index, level) - stack.change_by(-1) - - if operands_unification - # Specialize the setlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will write to the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - push([:setlocal_WC_0, index]) - when 1 - push([:setlocal_WC_1, index]) - else - push([:setlocal, index, level]) - end - else - push([:setlocal, index, level]) - end - end - - def setn(number) - stack.change_by(-1 + 1) - push([:setn, number]) - end - - def setspecial(key) - stack.change_by(-1) - push([:setspecial, key]) - end - - def splatarray(flag) - stack.change_by(-1 + 1) - push([:splatarray, flag]) - end - - def swap - stack.change_by(-2 + 2) - push([:swap]) - end - - def topn(number) - stack.change_by(+1) - push([:topn, number]) - end - - def toregexp(options, length) - stack.change_by(-length + 1) - push([:toregexp, options, length]) - end - - private - - # This creates a call data object that is used as the operand for the - # send, invokesuper, and objtostring instructions. - def call_data(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) - { mid: method_id, flag: flag, orig_argc: argc } - end - end - - # These constants correspond to the putspecialobject instruction. They are - # used to represent special objects that are pushed onto the stack. - VM_SPECIAL_OBJECT_VMCORE = 1 - VM_SPECIAL_OBJECT_CBASE = 2 - VM_SPECIAL_OBJECT_CONST_BASE = 3 - - # These constants correspond to the flag passed as part of the call data - # structure on the send instruction. They are used to represent various - # metadata about the callsite (e.g., were keyword arguments used?, was a - # block given?, etc.). - VM_CALL_ARGS_SPLAT = 1 << 0 - VM_CALL_ARGS_BLOCKARG = 1 << 1 - VM_CALL_FCALL = 1 << 2 - VM_CALL_VCALL = 1 << 3 - VM_CALL_ARGS_SIMPLE = 1 << 4 - VM_CALL_BLOCKISEQ = 1 << 5 - VM_CALL_KWARG = 1 << 6 - VM_CALL_KW_SPLAT = 1 << 7 - VM_CALL_TAILCALL = 1 << 8 - VM_CALL_SUPER = 1 << 9 - VM_CALL_ZSUPER = 1 << 10 - VM_CALL_OPT_SEND = 1 << 11 - VM_CALL_KW_SPLAT_MUT = 1 << 12 - - # These constants correspond to the setspecial instruction. - VM_SVAR_LASTLINE = 0 # $_ - VM_SVAR_BACKREF = 1 # $~ - VM_SVAR_FLIPFLOP_START = 2 # flipflop - - # These constants correspond to the checktype instruction. - VM_CHECKTYPE_ARRAY = 7 - - # These constants correspond to the checkmatch instruction. - VM_CHECKMATCH_TYPE_WHEN = 1 - VM_CHECKMATCH_TYPE_CASE = 2 - VM_CHECKMATCH_TYPE_RESCUE = 3 end end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index 05c05705..0fb27f7e 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -20,7 +20,7 @@ def compile iseq.setglobal(:$tape) iseq.getglobal(:$tape) iseq.putobject(0) - iseq.send(:default=, 1) + iseq.send(YARV.calldata(:default=, 1)) # Set up the $cursor global variable that will hold the current position # in the tape. @@ -99,17 +99,17 @@ def change_by(iseq, value) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1) + iseq.send(YARV.calldata(:[], 1)) if value < 0 iseq.putobject(-value) - iseq.send(:-, 1) + iseq.send(YARV.calldata(:-, 1)) else iseq.putobject(value) - iseq.send(:+, 1) + iseq.send(YARV.calldata(:+, 1)) end - iseq.send(:[]=, 2) + iseq.send(YARV.calldata(:[]=, 2)) end # $cursor += value @@ -118,10 +118,10 @@ def shift_by(iseq, value) if value < 0 iseq.putobject(-value) - iseq.send(:-, 1) + iseq.send(YARV.calldata(:-, 1)) else iseq.putobject(value) - iseq.send(:+, 1) + iseq.send(YARV.calldata(:+, 1)) end iseq.setglobal(:$cursor) @@ -133,10 +133,10 @@ def output_char(iseq) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1) - iseq.send(:chr, 0) + iseq.send(YARV.calldata(:[], 1)) + iseq.send(YARV.calldata(:chr)) - iseq.send(:putc, 1) + iseq.send(YARV.calldata(:putc, 1)) end # $tape[$cursor] = $stdin.getc.ord @@ -145,10 +145,10 @@ def input_char(iseq) iseq.getglobal(:$cursor) iseq.getglobal(:$stdin) - iseq.send(:getc, 0) - iseq.send(:ord, 0) + iseq.send(YARV.calldata(:getc)) + iseq.send(YARV.calldata(:ord)) - iseq.send(:[]=, 2) + iseq.send(YARV.calldata(:[]=, 2)) end # unless $tape[$cursor] == 0 @@ -157,10 +157,10 @@ def loop_start(iseq) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1) + iseq.send(YARV.calldata(:[], 1)) iseq.putobject(0) - iseq.send(:==, 1) + iseq.send(YARV.calldata(:==, 1)) branchunless = iseq.branchunless(-1) [start_label, branchunless] diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb new file mode 100644 index 00000000..45f2bb59 --- /dev/null +++ b/lib/syntax_tree/yarv/compiler.rb @@ -0,0 +1,2164 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class is an experiment in transforming Syntax Tree nodes into their + # corresponding YARV instruction sequences. It attempts to mirror the + # behavior of RubyVM::InstructionSequence.compile. + # + # You use this as with any other visitor. First you parse code into a tree, + # then you visit it with this compiler. Visiting the root node of the tree + # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. + # With that object you can call #to_a on it, which will return a serialized + # form of the instruction sequence as an array. This array _should_ mirror + # the array given by RubyVM::InstructionSequence#to_a. + # + # As an example, here is how you would compile a single expression: + # + # program = SyntaxTree.parse("1 + 2") + # program.accept(SyntaxTree::YARV::Compiler.new).to_a + # + # [ + # "YARVInstructionSequence/SimpleDataFormat", + # 3, + # 1, + # 1, + # {:arg_size=>0, :local_size=>0, :stack_max=>2}, + # "", + # "", + # "", + # 1, + # :top, + # [], + # {}, + # [], + # [ + # [:putobject_INT2FIX_1_], + # [:putobject, 2], + # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], + # [:leave] + # ] + # ] + # + # Note that this is the same output as calling: + # + # RubyVM::InstructionSequence.compile("1 + 2").to_a + # + class Compiler < BasicVisitor + # This visitor is responsible for converting Syntax Tree nodes into their + # corresponding Ruby structures. This is used to convert the operands of + # some instructions like putobject that push a Ruby object directly onto + # the stack. It is only used when the entire structure can be represented + # at compile-time, as opposed to constructed at run-time. + class RubyVisitor < BasicVisitor + # This error is raised whenever a node cannot be converted into a Ruby + # object at compile-time. + class CompilationError < StandardError + end + + # This will attempt to compile the given node. If it's possible, then + # it will return the compiled object. Otherwise it will return nil. + def self.compile(node) + node.accept(new) + rescue CompilationError + end + + def visit_array(node) + visit_all(node.contents.parts) + end + + def visit_bare_assoc_hash(node) + node.assocs.to_h do |assoc| + # We can only convert regular key-value pairs. A double splat ** + # operator means it has to be converted at run-time. + raise CompilationError unless assoc.is_a?(Assoc) + [visit(assoc.key), visit(assoc.value)] + end + end + + def visit_float(node) + node.value.to_f + end + + alias visit_hash visit_bare_assoc_hash + + def visit_imaginary(node) + node.value.to_c + end + + def visit_int(node) + node.value.to_i + end + + def visit_label(node) + node.value.chomp(":").to_sym + end + + def visit_mrhs(node) + visit_all(node.parts) + end + + def visit_qsymbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_qwords(node) + visit_all(node.elements) + end + + def visit_range(node) + left, right = [visit(node.left), visit(node.right)] + node.operator.value === ".." ? left..right : left...right + end + + def visit_rational(node) + node.value.to_r + end + + def visit_regexp_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) + else + # Any interpolation of expressions or variables will result in the + # regular expression being constructed at run-time. + raise CompilationError + end + end + + # This isn't actually a visit method, though maybe it should be. It is + # responsible for converting the set of string options on a regular + # expression into its equivalent integer. + def visit_regexp_literal_flags(node) + node + .options + .chars + .inject(0) do |accum, option| + accum | + case option + when "i" + Regexp::IGNORECASE + when "x" + Regexp::EXTENDED + when "m" + Regexp::MULTILINE + else + raise "Unknown regexp option: #{option}" + end + end + end + + def visit_symbol_literal(node) + node.value.value.to_sym + end + + def visit_symbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_tstring_content(node) + node.value + end + + def visit_var_ref(node) + raise CompilationError unless node.value.is_a?(Kw) + + case node.value.value + when "nil" + nil + when "true" + true + when "false" + false + else + raise CompilationError + end + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + # Any interpolation of expressions or variables will result in the + # string being constructed at run-time. + raise CompilationError + end + end + + def visit_words(node) + visit_all(node.elements) + end + + def visit_unsupported(_node) + raise CompilationError + end + + # Please forgive the metaprogramming here. This is used to create visit + # methods for every node that we did not explicitly handle. By default + # each of these methods will raise a CompilationError. + handled = instance_methods(false) + (Visitor.instance_methods(false) - handled).each do |method| + alias_method method, :visit_unsupported + end + end + + # These options mirror the compilation options that we currently support + # that can be also passed to RubyVM::InstructionSequence.compile. + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + # The current instruction sequence that is being compiled. + attr_reader :iseq + + # A boolean to track if we're currently compiling the last statement + # within a set of statements. This information is necessary to determine + # if we need to return the value of the last statement. + attr_reader :last_statement + + def initialize( + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + + @iseq = nil + @last_statement = false + end + + def visit_BEGIN(node) + visit(node.statements) + end + + def visit_CHAR(node) + if frozen_string_literal + iseq.putobject(node.value[1..]) + else + iseq.putstring(node.value[1..]) + end + end + + def visit_END(node) + once_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + postexe_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + + *statements, last_statement = node.statements.body + visit_all(statements) + with_last_statement { visit(last_statement) } + + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.send( + YARV.calldata(:"core#set_postexe", 0, CallData::CALL_FCALL), + postexe_iseq + ) + iseq.leave + end + + iseq.once(once_iseq, iseq.inline_storage) + iseq.pop + end + + def visit_alias(node) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) + visit(node.left) + visit(node.right) + iseq.send(YARV.calldata(:"core#set_method_alias", 3)) + end + + def visit_aref(node) + calldata = YARV.calldata(:[], 1) + visit(node.collection) + + if !frozen_string_literal && specialized_instruction && + (node.index.parts.length == 1) + arg = node.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + iseq.opt_aref_with(string_part.value, calldata) + return + end + end + end + + visit(node.index) + iseq.send(calldata) + end + + def visit_arg_block(node) + visit(node.value) + end + + def visit_arg_paren(node) + visit(node.arguments) + end + + def visit_arg_star(node) + visit(node.value) + iseq.splatarray(false) + end + + def visit_args(node) + visit_all(node.parts) + end + + def visit_array(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + elsif node.contents && node.contents.parts.length == 1 && + node.contents.parts.first.is_a?(BareAssocHash) && + node.contents.parts.first.assocs.length == 1 && + node.contents.parts.first.assocs.first.is_a?(AssocSplat) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.newhash(0) + visit(node.contents.parts.first) + iseq.send(YARV.calldata(:"core#hash_merge_kwd", 2)) + iseq.newarraykwsplat(1) + else + length = 0 + + node.contents.parts.each do |part| + if part.is_a?(ArgStar) + if length > 0 + iseq.newarray(length) + length = 0 + end + + visit(part.value) + iseq.concatarray + else + visit(part) + length += 1 + end + end + + iseq.newarray(length) if length > 0 + iseq.concatarray if length > 0 && length != node.contents.parts.length + end + end + + def visit_aryptn(node) + match_failures = [] + jumps_to_exit = [] + + # If there's a constant, then check if we match against that constant or + # not first. Branch to failure if we don't. + if node.constant + iseq.dup + visit(node.constant) + iseq.checkmatch(CheckMatch::TYPE_CASE) + match_failures << iseq.branchunless(-1) + end + + # First, check if the #deconstruct cache is nil. If it is, we're going + # to call #deconstruct on the object and cache the result. + iseq.topn(2) + branchnil = iseq.branchnil(-1) + + # Next, ensure that the cached value was cached correctly, otherwise + # fail the match. + iseq.topn(2) + match_failures << iseq.branchunless(-1) + + # Since we have a valid cached value, we can skip past the part where we + # call #deconstruct on the object. + iseq.pop + iseq.topn(1) + jump = iseq.jump(-1) + + # Check if the object responds to #deconstruct, fail the match + # otherwise. + branchnil.patch!(iseq) + iseq.dup + iseq.putobject(:deconstruct) + iseq.send(YARV.calldata(:respond_to?, 1)) + iseq.setn(3) + match_failures << iseq.branchunless(-1) + + # Call #deconstruct and ensure that it's an array, raise an error + # otherwise. + iseq.send(YARV.calldata(:deconstruct)) + iseq.setn(2) + iseq.dup + iseq.checktype(CheckType::TYPE_ARRAY) + match_error = iseq.branchunless(-1) + + # Ensure that the deconstructed array has the correct size, fail the + # match otherwise. + jump.patch!(iseq) + iseq.dup + iseq.send(YARV.calldata(:length)) + iseq.putobject(node.requireds.length) + iseq.send(YARV.calldata(:==, 1)) + match_failures << iseq.branchunless(-1) + + # For each required element, check if the deconstructed array contains + # the element, otherwise jump out to the top-level match failure. + iseq.dup + node.requireds.each_with_index do |required, index| + iseq.putobject(index) + iseq.send(YARV.calldata(:[], 1)) + + case required + when VarField + lookup = visit(required) + iseq.setlocal(lookup.index, lookup.level) + else + visit(required) + iseq.checkmatch(CheckMatch::TYPE_CASE) + match_failures << iseq.branchunless(-1) + end + + if index < node.requireds.length - 1 + iseq.dup + else + iseq.pop + jumps_to_exit << iseq.jump(-1) + end + end + + # Set up the routine here to raise an error to indicate that the type of + # the deconstructed array was incorrect. + match_error.patch!(iseq) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject(TypeError) + iseq.putobject("deconstruct must return Array") + iseq.send(YARV.calldata(:"core#raise", 2)) + iseq.pop + + # Patch all of the match failures to jump here so that we pop a final + # value before returning to the parent node. + match_failures.each { |match_failure| match_failure.patch!(iseq) } + iseq.pop + jumps_to_exit + end + + def visit_assign(node) + case node.target + when ARefField + calldata = YARV.calldata(:[]=, 2) + + if !frozen_string_literal && specialized_instruction && + (node.target.index.parts.length == 1) + arg = node.target.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + visit(node.target.collection) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.opt_aset_with(string_part.value, calldata) + iseq.pop + return + end + end + end + + iseq.putnil + visit(node.target.collection) + visit(node.target.index) + visit(node.value) + iseq.setn(3) + iseq.send(calldata) + iseq.pop + when ConstPathField + names = constant_names(node.target) + name = names.pop + + if RUBY_VERSION >= "3.2" + iseq.opt_getconstant_path(names) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) + else + visit(node.value) + iseq.dup if last_statement? + iseq.opt_getconstant_path(names) + iseq.setconstant(name) + end + when Field + iseq.putnil + visit(node.target) + visit(node.value) + iseq.setn(2) + iseq.send(YARV.calldata(:"#{node.target.name.value}=", 1)) + iseq.pop + when TopConstField + name = node.target.constant.value.to_sym + + if RUBY_VERSION >= "3.2" + iseq.putobject(Object) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) + else + visit(node.value) + iseq.dup if last_statement? + iseq.putobject(Object) + iseq.setconstant(name) + end + when VarField + visit(node.value) + iseq.dup if last_statement? + + case node.target.value + when Const + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(node.target.value.value.to_sym) + when CVar + iseq.setclassvariable(node.target.value.value.to_sym) + when GVar + iseq.setglobal(node.target.value.value.to_sym) + when Ident + lookup = visit(node.target) + + if lookup.local.is_a?(LocalTable::BlockLocal) + iseq.setblockparam(lookup.index, lookup.level) + else + iseq.setlocal(lookup.index, lookup.level) + end + when IVar + iseq.setinstancevariable(node.target.value.value.to_sym) + end + end + end + + def visit_assoc(node) + visit(node.key) + visit(node.value) + end + + def visit_assoc_splat(node) + visit(node.value) + end + + def visit_backref(node) + iseq.getspecial(GetSpecial::SVAR_BACKREF, node.value[1..].to_i << 1) + end + + def visit_bare_assoc_hash(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duphash(compiled) + else + visit_all(node.assocs) + end + end + + def visit_binary(node) + case node.operator + when :"&&" + visit(node.left) + iseq.dup + + branchunless = iseq.branchunless(-1) + iseq.pop + + visit(node.right) + branchunless.patch!(iseq) + when :"||" + visit(node.left) + iseq.dup + + branchif = iseq.branchif(-1) + iseq.pop + + visit(node.right) + branchif.patch!(iseq) + else + visit(node.left) + visit(node.right) + iseq.send(YARV.calldata(node.operator, 1)) + end + end + + def visit_block(node) + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + visit(node.block_var) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + end + + def visit_block_var(node) + params = node.params + + if params.requireds.length == 1 && params.optionals.empty? && + !params.rest && params.posts.empty? && params.keywords.empty? && + !params.keyword_rest && !params.block + iseq.argument_options[:ambiguous_param0] = true + end + + visit(node.params) + + node.locals.each { |local| iseq.local_table.plain(local.value.to_sym) } + end + + def visit_blockarg(node) + iseq.argument_options[:block_start] = iseq.argument_size + iseq.local_table.block(node.name.value.to_sym) + iseq.argument_size += 1 + end + + def visit_bodystmt(node) + visit(node.statements) + end + + def visit_call(node) + if node.is_a?(CallNode) + return( + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) + ) + ) + end + + arg_parts = argument_parts(node.arguments) + argc = arg_parts.length + + # First we're going to check if we're calling a method on an array + # literal without any arguments. In that case there are some + # specializations we might be able to perform. + if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) + case node.receiver + when ArrayLiteral + parts = node.receiver.contents&.parts || [] + + if parts.none? { |part| part.is_a?(ArgStar) } && + RubyVisitor.compile(node.receiver).nil? + case node.message.value + when "max" + visit(node.receiver.contents) + iseq.opt_newarray_max(parts.length) + return + when "min" + visit(node.receiver.contents) + iseq.opt_newarray_min(parts.length) + return + end + end + when StringLiteral + if RubyVisitor.compile(node.receiver).nil? + case node.message.value + when "-@" + iseq.opt_str_uminus(node.receiver.parts.first.value) + return + when "freeze" + iseq.opt_str_freeze(node.receiver.parts.first.value) + return + end + end + end + end + + if node.receiver + if node.receiver.is_a?(VarRef) + lookup = iseq.local_variable(node.receiver.value.value.to_sym) + + if lookup.local.is_a?(LocalTable::BlockLocal) + iseq.getblockparamproxy(lookup.index, lookup.level) + else + visit(node.receiver) + end + else + visit(node.receiver) + end + else + iseq.putself + end + + branchnil = + if node.operator&.value == "&." + iseq.dup + iseq.branchnil(-1) + end + + flag = 0 + + arg_parts.each do |arg_part| + case arg_part + when ArgBlock + argc -= 1 + flag |= CallData::CALL_ARGS_BLOCKARG + visit(arg_part) + when ArgStar + flag |= CallData::CALL_ARGS_SPLAT + visit(arg_part) + when ArgsForward + flag |= CallData::CALL_ARGS_SPLAT + flag |= CallData::CALL_ARGS_BLOCKARG + + lookup = iseq.local_table.find(:*) + iseq.getlocal(lookup.index, lookup.level) + iseq.splatarray(arg_parts.length != 1) + + lookup = iseq.local_table.find(:&) + iseq.getblockparamproxy(lookup.index, lookup.level) + when BareAssocHash + flag |= CallData::CALL_KW_SPLAT + visit(arg_part) + else + visit(arg_part) + end + end + + block_iseq = visit(node.block) if node.block + flag |= CallData::CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 + flag |= CallData::CALL_FCALL if node.receiver.nil? + + iseq.send( + YARV.calldata(node.message.value.to_sym, argc, flag), + block_iseq + ) + branchnil.patch!(iseq) if branchnil + end + + def visit_case(node) + visit(node.value) if node.value + + clauses = [] + else_clause = nil + current = node.consequent + + while current + clauses << current + + if (current = current.consequent).is_a?(Else) + else_clause = current + break + end + end + + branches = + clauses.map do |clause| + visit(clause.arguments) + iseq.topn(1) + iseq.send( + YARV.calldata( + :===, + 1, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + [clause, iseq.branchif(:label_00)] + end + + iseq.pop + else_clause ? visit(else_clause) : iseq.putnil + iseq.leave + + branches.each_with_index do |(clause, branchif), index| + iseq.leave if index != 0 + branchif.patch!(iseq) + iseq.pop + visit(clause) + end + end + + def visit_class(node) + name = node.constant.constant.value.to_sym + class_iseq = + with_child_iseq(iseq.class_child_iseq(name, node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + flags = DefineClass::TYPE_CLASS + + case node.constant + when ConstPathRef + flags |= DefineClass::FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + when TopConstRef + flags |= DefineClass::FLAG_SCOPED + iseq.putobject(Object) + end + + if node.superclass + flags |= DefineClass::FLAG_HAS_SUPERCLASS + visit(node.superclass) + else + iseq.putnil + end + + iseq.defineclass(name, class_iseq, flags) + end + + def visit_command(node) + visit_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_command_call(node) + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_const_path_field(node) + visit(node.parent) + end + + def visit_const_path_ref(node) + names = constant_names(node) + iseq.opt_getconstant_path(names) + end + + def visit_def(node) + name = node.name.value.to_sym + method_iseq = iseq.method_child_iseq(name.to_s, node.location) + + with_child_iseq(method_iseq) do + visit(node.params) if node.params + iseq.event(:RUBY_EVENT_CALL) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_RETURN) + iseq.leave + end + + if node.target + visit(node.target) + iseq.definesmethod(name, method_iseq) + else + iseq.definemethod(name, method_iseq) + end + + iseq.putobject(name) + end + + def visit_defined(node) + case node.value + when Assign + # If we're assigning to a local variable, then we need to make sure + # that we put it into the local table. + if node.value.target.is_a?(VarField) && + node.value.target.value.is_a?(Ident) + iseq.local_table.plain(node.value.target.value.value.to_sym) + end + + iseq.putobject("assignment") + when VarRef + value = node.value.value + name = value.value.to_sym + + case value + when Const + iseq.putnil + iseq.defined(Defined::TYPE_CONST, name, "constant") + when CVar + iseq.putnil + iseq.defined(Defined::TYPE_CVAR, name, "class variable") + when GVar + iseq.putnil + iseq.defined(Defined::TYPE_GVAR, name, "global-variable") + when Ident + iseq.putobject("local-variable") + when IVar + iseq.putnil + iseq.defined(Defined::TYPE_IVAR, name, "instance-variable") + when Kw + case name + when :false + iseq.putobject("false") + when :nil + iseq.putobject("nil") + when :self + iseq.putobject("self") + when :true + iseq.putobject("true") + end + end + when VCall + iseq.putself + + name = node.value.value.value.to_sym + iseq.defined(Defined::TYPE_FUNC, name, "method") + when YieldNode + iseq.putnil + iseq.defined(Defined::TYPE_YIELD, false, "yield") + when ZSuper + iseq.putnil + iseq.defined(Defined::TYPE_ZSUPER, false, "super") + else + iseq.putobject("expression") + end + end + + def visit_dyna_symbol(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + iseq.putobject(node.parts.first.value.to_sym) + end + end + + def visit_else(node) + visit(node.statements) + iseq.pop unless last_statement? + end + + def visit_elsif(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.statements, + consequent: node.consequent, + location: node.location + ) + ) + end + + def visit_field(node) + visit(node.parent) + end + + def visit_float(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_for(node) + visit(node.collection) + + name = node.index.value.value.to_sym + iseq.local_table.plain(name) + + block_iseq = + with_child_iseq(iseq.block_child_iseq(node.statements.location)) do + iseq.argument_options[:lead_num] ||= 0 + iseq.argument_options[:lead_num] += 1 + iseq.argument_options[:ambiguous_param0] = true + + iseq.argument_size += 1 + iseq.local_table.plain(2) + + iseq.getlocal(0, 0) + + local_variable = iseq.local_variable(name) + iseq.setlocal(local_variable.index, local_variable.level) + + iseq.event(:RUBY_EVENT_B_CALL) + iseq.nop + + visit(node.statements) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.send(YARV.calldata(:each, 0, 0), block_iseq) + end + + def visit_hash(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duphash(compiled) + else + visit_all(node.assocs) + iseq.newhash(node.assocs.length * 2) + end + end + + def visit_heredoc(node) + if node.beginning.value.end_with?("`") + visit_xstring_literal(node) + elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_if(node) + if node.predicate.is_a?(RangeNode) + iseq.getspecial(GetSpecial::SVAR_FLIPFLOP_START, 0) + branchif = iseq.branchif(-1) + + visit(node.predicate.left) + branchunless_true = iseq.branchunless(-1) + + iseq.putobject(true) + iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) + branchif.patch!(iseq) + + visit(node.predicate.right) + branchunless_false = iseq.branchunless(-1) + + iseq.putobject(false) + iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) + branchunless_false.patch!(iseq) + + visit(node.statements) + iseq.leave + branchunless_true.patch!(iseq) + iseq.putnil + else + visit(node.predicate) + branchunless = iseq.branchunless(-1) + visit(node.statements) + + if last_statement? + iseq.leave + branchunless.patch!(iseq) + + node.consequent ? visit(node.consequent) : iseq.putnil + else + iseq.pop + + if node.consequent + jump = iseq.jump(-1) + branchunless.patch!(iseq) + visit(node.consequent) + jump.patch!(iseq) + else + branchunless.patch!(iseq) + end + end + end + end + + def visit_if_op(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.truthy, + consequent: + Else.new( + keyword: Kw.new(value: "else", location: Location.default), + statements: node.falsy, + location: Location.default + ), + location: Location.default + ) + ) + end + + def visit_imaginary(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_int(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_kwrest_param(node) + iseq.argument_options[:kwrest] = iseq.argument_size + iseq.argument_size += 1 + iseq.local_table.plain(node.name.value.to_sym) + end + + def visit_label(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_lambda(node) + lambda_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + visit(node.params) + visit(node.statements) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.send(YARV.calldata(:lambda, 0, CallData::CALL_FCALL), lambda_iseq) + end + + def visit_lambda_var(node) + visit_block_var(node) + end + + def visit_massign(node) + visit(node.value) + iseq.dup + visit(node.target) + end + + def visit_method_add_block(node) + visit_call( + CommandCall.new( + receiver: node.call.receiver, + operator: node.call.operator, + message: node.call.message, + arguments: node.call.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_mlhs(node) + lookups = [] + node.parts.each do |part| + case part + when VarField + lookups << visit(part) + end + end + + iseq.expandarray(lookups.length, 0) + lookups.each { |lookup| iseq.setlocal(lookup.index, lookup.level) } + end + + def visit_module(node) + name = node.constant.constant.value.to_sym + module_iseq = + with_child_iseq(iseq.module_child_iseq(name, node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + flags = DefineClass::TYPE_MODULE + + case node.constant + when ConstPathRef + flags |= DefineClass::FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + when TopConstRef + flags |= DefineClass::FLAG_SCOPED + iseq.putobject(Object) + end + + iseq.putnil + iseq.defineclass(name, module_iseq, flags) + end + + def visit_mrhs(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + visit_all(node.parts) + iseq.newarray(node.parts.length) + end + end + + def visit_not(node) + visit(node.statement) + iseq.send(YARV.calldata(:!)) + end + + def visit_opassign(node) + flag = CallData::CALL_ARGS_SIMPLE + if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) + flag |= CallData::CALL_FCALL + end + + case (operator = node.operator.value.chomp("=").to_sym) + when :"&&" + branchunless = nil + + with_opassign(node) do + iseq.dup + branchunless = iseq.branchunless(-1) + iseq.pop + visit(node.value) + end + + case node.target + when ARefField + iseq.leave + branchunless.patch!(iseq) + iseq.setn(3) + iseq.adjuststack(3) + when ConstPathField, TopConstField + branchunless.patch!(iseq) + iseq.swap + iseq.pop + else + branchunless.patch!(iseq) + end + when :"||" + if node.target.is_a?(ConstPathField) || + node.target.is_a?(TopConstField) + opassign_defined(node) + iseq.swap + iseq.pop + elsif node.target.is_a?(VarField) && + [Const, CVar, GVar].include?(node.target.value.class) + opassign_defined(node) + else + branchif = nil + + with_opassign(node) do + iseq.dup + branchif = iseq.branchif(-1) + iseq.pop + visit(node.value) + end + + if node.target.is_a?(ARefField) + iseq.leave + branchif.patch!(iseq) + iseq.setn(3) + iseq.adjuststack(3) + else + branchif.patch!(iseq) + end + end + else + with_opassign(node) do + visit(node.value) + iseq.send(YARV.calldata(operator, 1, flag)) + end + end + end + + def visit_params(node) + argument_options = iseq.argument_options + + if node.requireds.any? + argument_options[:lead_num] = 0 + + node.requireds.each do |required| + iseq.local_table.plain(required.value.to_sym) + iseq.argument_size += 1 + argument_options[:lead_num] += 1 + end + end + + node.optionals.each do |(optional, value)| + index = iseq.local_table.size + name = optional.value.to_sym + + iseq.local_table.plain(name) + iseq.argument_size += 1 + + argument_options[:opt] = [iseq.label] unless argument_options.key?( + :opt + ) + + visit(value) + iseq.setlocal(index, 0) + iseq.argument_options[:opt] << iseq.label + end + + visit(node.rest) if node.rest + + if node.posts.any? + argument_options[:post_start] = iseq.argument_size + argument_options[:post_num] = 0 + + node.posts.each do |post| + iseq.local_table.plain(post.value.to_sym) + iseq.argument_size += 1 + argument_options[:post_num] += 1 + end + end + + if node.keywords.any? + argument_options[:kwbits] = 0 + argument_options[:keyword] = [] + + keyword_bits_name = node.keyword_rest ? 3 : 2 + iseq.argument_size += 1 + keyword_bits_index = iseq.local_table.locals.size + node.keywords.size + + node.keywords.each_with_index do |(keyword, value), keyword_index| + name = keyword.value.chomp(":").to_sym + index = iseq.local_table.size + + iseq.local_table.plain(name) + iseq.argument_size += 1 + argument_options[:kwbits] += 1 + + if value.nil? + argument_options[:keyword] << name + elsif (compiled = RubyVisitor.compile(value)) + argument_options[:keyword] << [name, compiled] + else + argument_options[:keyword] << [name] + iseq.checkkeyword(keyword_bits_index, keyword_index) + branchif = iseq.branchif(-1) + visit(value) + iseq.setlocal(index, 0) + branchif.patch!(iseq) + end + end + + iseq.local_table.plain(keyword_bits_name) + end + + if node.keyword_rest.is_a?(ArgsForward) + iseq.local_table.plain(:*) + iseq.local_table.plain(:&) + + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_options[:block_start] = iseq.argument_size + 1 + + iseq.argument_size += 2 + elsif node.keyword_rest + visit(node.keyword_rest) + end + + visit(node.block) if node.block + end + + def visit_paren(node) + visit(node.contents) + end + + def visit_program(node) + node.statements.body.each do |statement| + break unless statement.is_a?(Comment) + + if statement.value == "# frozen_string_literal: true" + @frozen_string_literal = true + end + end + + preexes = [] + statements = [] + + node.statements.body.each do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + # ignore + when BEGINBlock + preexes << statement + else + statements << statement + end + end + + top_iseq = + InstructionSequence.new( + :top, + "", + nil, + node.location, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + + with_child_iseq(top_iseq) do + visit_all(preexes) + + if statements.empty? + iseq.putnil + else + *statements, last_statement = statements + visit_all(statements) + with_last_statement { visit(last_statement) } + end + + iseq.leave + end + end + + def visit_qsymbols(node) + iseq.duparray(node.accept(RubyVisitor.new)) + end + + def visit_qwords(node) + if frozen_string_literal + iseq.duparray(node.accept(RubyVisitor.new)) + else + visit_all(node.elements) + iseq.newarray(node.elements.length) + end + end + + def visit_range(node) + if (compiled = RubyVisitor.compile(node)) + iseq.putobject(compiled) + else + visit(node.left) + visit(node.right) + iseq.newrange(node.operator.value == ".." ? 0 : 1) + end + end + + def visit_rassign(node) + iseq.putnil + + if node.operator.is_a?(Kw) + jumps = [] + + visit(node.value) + iseq.dup + + case node.pattern + when VarField + lookup = visit(node.pattern) + iseq.setlocal(lookup.index, lookup.level) + jumps << iseq.jump(-1) + else + jumps.concat(visit(node.pattern)) + end + + iseq.pop + iseq.pop + iseq.putobject(false) + iseq.leave + + jumps.each { |jump| jump.patch!(iseq) } + iseq.adjuststack(2) + iseq.putobject(true) + else + jumps_to_match = [] + + iseq.putnil + iseq.putobject(false) + iseq.putnil + iseq.putnil + visit(node.value) + iseq.dup + + # Visit the pattern. If it matches, + case node.pattern + when VarField + lookup = visit(node.pattern) + iseq.setlocal(lookup.index, lookup.level) + jumps_to_match << iseq.jump(-1) + else + jumps_to_match.concat(visit(node.pattern)) + end + + # First we're going to push the core onto the stack, then we'll check + # if the value to match is truthy. If it is, we'll jump down to raise + # NoMatchingPatternKeyError. Otherwise we'll raise + # NoMatchingPatternError. + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.topn(4) + branchif_no_key = iseq.branchif(-1) + + # Here we're going to raise NoMatchingPatternError. + iseq.putobject(NoMatchingPatternError) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(YARV.calldata(:"core#sprintf", 3)) + iseq.send(YARV.calldata(:"core#raise", 2)) + jump_to_exit = iseq.jump(-1) + + # Here we're going to raise NoMatchingPatternKeyError. + branchif_no_key.patch!(iseq) + iseq.putobject(NoMatchingPatternKeyError) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(YARV.calldata(:"core#sprintf", 3)) + iseq.topn(7) + iseq.topn(9) + iseq.send( + YARV.calldata(:new, 1, CallData::CALL_KWARG, %i[matchee key]) + ) + iseq.send(YARV.calldata(:"core#raise", 1)) + + # This runs when the pattern fails to match. + jump_to_exit.patch!(iseq) + iseq.adjuststack(7) + iseq.putnil + iseq.leave + + # This runs when the pattern matches successfully. + jumps_to_match.each { |jump| jump.patch!(iseq) } + iseq.adjuststack(6) + iseq.putnil + end + end + + def visit_rational(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_regexp_literal(node) + if (compiled = RubyVisitor.compile(node)) + iseq.putobject(compiled) + else + flags = RubyVisitor.new.visit_regexp_literal_flags(node) + length = visit_string_parts(node) + iseq.toregexp(flags, length) + end + end + + def visit_rest_param(node) + iseq.local_table.plain(node.name.value.to_sym) + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_size += 1 + end + + def visit_sclass(node) + visit(node.target) + iseq.putnil + + singleton_iseq = + with_child_iseq(iseq.singleton_class_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + iseq.defineclass( + :singletonclass, + singleton_iseq, + DefineClass::TYPE_SINGLETON_CLASS + ) + end + + def visit_statements(node) + statements = + node.body.select do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + false + else + true + end + end + + statements.empty? ? iseq.putnil : visit_all(statements) + end + + def visit_string_concat(node) + value = node.left.parts.first.value + node.right.parts.first.value + + visit_string_literal( + StringLiteral.new( + parts: [TStringContent.new(value: value, location: node.location)], + quote: node.left.quote, + location: node.location + ) + ) + end + + def visit_string_embexpr(node) + visit(node.statements) + end + + def visit_string_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_super(node) + iseq.putself + visit(node.arguments) + iseq.invokesuper( + YARV.calldata( + nil, + argument_parts(node.arguments).length, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | + CallData::CALL_SUPER + ), + nil + ) + end + + def visit_symbol_literal(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_symbols(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + node.elements.each do |element| + if element.parts.length == 1 && + element.parts.first.is_a?(TStringContent) + iseq.putobject(element.parts.first.value.to_sym) + else + length = visit_string_parts(element) + iseq.concatstrings(length) + iseq.intern + end + end + + iseq.newarray(node.elements.length) + end + end + + def visit_top_const_ref(node) + iseq.opt_getconstant_path(constant_names(node)) + end + + def visit_tstring_content(node) + if frozen_string_literal + iseq.putobject(node.accept(RubyVisitor.new)) + else + iseq.putstring(node.accept(RubyVisitor.new)) + end + end + + def visit_unary(node) + method_id = + case node.operator + when "+", "-" + "#{node.operator}@" + else + node.operator + end + + visit_call( + CommandCall.new( + receiver: node.statement, + operator: nil, + message: Ident.new(value: method_id, location: Location.default), + arguments: nil, + block: nil, + location: Location.default + ) + ) + end + + def visit_undef(node) + node.symbols.each_with_index do |symbol, index| + iseq.pop if index != 0 + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) + visit(symbol) + iseq.send(YARV.calldata(:"core#undef_method", 2)) + end + end + + def visit_unless(node) + visit(node.predicate) + branchunless = iseq.branchunless(-1) + node.consequent ? visit(node.consequent) : iseq.putnil + + if last_statement? + iseq.leave + branchunless.patch!(iseq) + + visit(node.statements) + else + iseq.pop + + if node.consequent + jump = iseq.jump(-1) + branchunless.patch!(iseq) + visit(node.consequent) + jump.patch!(iseq.label) + else + branchunless.patch!(iseq) + end + end + end + + def visit_until(node) + jumps = [] + + jumps << iseq.jump(-1) + iseq.putnil + iseq.pop + jumps << iseq.jump(-1) + + label = iseq.label + visit(node.statements) + iseq.pop + jumps.each { |jump| jump.patch!(iseq) } + + visit(node.predicate) + iseq.branchunless(label) + iseq.putnil if last_statement? + end + + def visit_var_field(node) + case node.value + when CVar, IVar + name = node.value.value.to_sym + iseq.inline_storage_for(name) + when Ident + name = node.value.value.to_sym + + if (local_variable = iseq.local_variable(name)) + local_variable + else + iseq.local_table.plain(name) + iseq.local_variable(name) + end + end + end + + def visit_var_ref(node) + case node.value + when Const + iseq.opt_getconstant_path(constant_names(node)) + when CVar + name = node.value.value.to_sym + iseq.getclassvariable(name) + when GVar + iseq.getglobal(node.value.value.to_sym) + when Ident + lookup = iseq.local_variable(node.value.value.to_sym) + + case lookup.local + when LocalTable::BlockLocal + iseq.getblockparam(lookup.index, lookup.level) + when LocalTable::PlainLocal + iseq.getlocal(lookup.index, lookup.level) + end + when IVar + name = node.value.value.to_sym + iseq.getinstancevariable(name) + when Kw + case node.value.value + when "false" + iseq.putobject(false) + when "nil" + iseq.putnil + when "self" + iseq.putself + when "true" + iseq.putobject(true) + end + end + end + + def visit_vcall(node) + iseq.putself + iseq.send( + YARV.calldata( + node.value.value.to_sym, + 0, + CallData::CALL_FCALL | CallData::CALL_VCALL | + CallData::CALL_ARGS_SIMPLE + ) + ) + end + + def visit_when(node) + visit(node.statements) + end + + def visit_while(node) + jumps = [] + + jumps << iseq.jump(-1) + iseq.putnil + iseq.pop + jumps << iseq.jump(-1) + + label = iseq.label + visit(node.statements) + iseq.pop + jumps.each { |jump| jump.patch!(iseq) } + + visit(node.predicate) + iseq.branchif(label) + iseq.putnil if last_statement? + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_words(node) + if frozen_string_literal && (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + visit_all(node.elements) + iseq.newarray(node.elements.length) + end + end + + def visit_xstring_literal(node) + iseq.putself + length = visit_string_parts(node) + iseq.concatstrings(node.parts.length) if length > 1 + iseq.send( + YARV.calldata( + :`, + 1, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + end + + def visit_yield(node) + parts = argument_parts(node.arguments) + visit_all(parts) + iseq.invokeblock(YARV.calldata(nil, parts.length)) + end + + def visit_zsuper(_node) + iseq.putself + iseq.invokesuper( + YARV.calldata( + nil, + 0, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | + CallData::CALL_SUPER | CallData::CALL_ZSUPER + ), + nil + ) + end + + private + + # This is a helper that is used in places where arguments may be present + # or they may be wrapped in parentheses. It's meant to descend down the + # tree and return an array of argument nodes. + def argument_parts(node) + case node + when nil + [] + when Args + node.parts + when ArgParen + if node.arguments.is_a?(ArgsForward) + [node.arguments] + else + node.arguments.parts + end + when Paren + node.contents.parts + end + end + + # Constant names when they are being assigned or referenced come in as a + # tree, but it's more convenient to work with them as an array. This + # method converts them into that array. This is nice because it's the + # operand that goes to opt_getconstant_path in Ruby 3.2. + def constant_names(node) + current = node + names = [] + + while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) + names.unshift(current.constant.value.to_sym) + current = current.parent + end + + case current + when VarField, VarRef + names.unshift(current.value.value.to_sym) + when TopConstRef + names.unshift(current.constant.value.to_sym) + names.unshift(:"") + end + + names + end + + # For the most part when an OpAssign (operator assignment) node with a ||= + # operator is being compiled it's a matter of reading the target, checking + # if the value should be evaluated, evaluating it if so, and then writing + # the result back to the target. + # + # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we + # first check if the value is defined using the defined instruction. I + # don't know why it is necessary, and suspect that it isn't. + def opassign_defined(node) + case node.target + when ConstPathField + visit(node.target.parent) + name = node.target.constant.value.to_sym + + iseq.dup + iseq.defined(Defined::TYPE_CONST_FROM, name, true) + when TopConstField + name = node.target.constant.value.to_sym + + iseq.putobject(Object) + iseq.dup + iseq.defined(Defined::TYPE_CONST_FROM, name, true) + when VarField + name = node.target.value.value.to_sym + iseq.putnil + + case node.target.value + when Const + iseq.defined(Defined::TYPE_CONST, name, true) + when CVar + iseq.defined(Defined::TYPE_CVAR, name, true) + when GVar + iseq.defined(Defined::TYPE_GVAR, name, true) + end + end + + branchunless = iseq.branchunless(-1) + + case node.target + when ConstPathField, TopConstField + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + when VarField + case node.target.value + when Const + iseq.opt_getconstant_path(constant_names(node.target)) + when CVar + iseq.getclassvariable(name) + when GVar + iseq.getglobal(name) + end + end + + iseq.dup + branchif = iseq.branchif(-1) + iseq.pop + + branchunless.patch!(iseq) + visit(node.value) + + case node.target + when ConstPathField, TopConstField + iseq.dupn(2) + iseq.swap + iseq.setconstant(name) + when VarField + iseq.dup + + case node.target.value + when Const + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(name) + when CVar + iseq.setclassvariable(name) + when GVar + iseq.setglobal(name) + end + end + + branchif.patch!(iseq) + end + + # Whenever a value is interpolated into a string-like structure, these + # three instructions are pushed. + def push_interpolate + iseq.dup + iseq.objtostring( + YARV.calldata( + :to_s, + 0, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + iseq.anytostring + end + + # There are a lot of nodes in the AST that act as contains of parts of + # strings. This includes things like string literals, regular expressions, + # heredocs, etc. This method will visit all the parts of a string within + # those containers. + def visit_string_parts(node) + length = 0 + + unless node.parts.first.is_a?(TStringContent) + iseq.putobject("") + length += 1 + end + + node.parts.each do |part| + case part + when StringDVar + visit(part.variable) + push_interpolate + when StringEmbExpr + visit(part) + push_interpolate + when TStringContent + iseq.putobject(part.accept(RubyVisitor.new)) + end + + length += 1 + end + + length + end + + # The current instruction sequence that we're compiling is always stored + # on the compiler. When we descend into a node that has its own + # instruction sequence, this method can be called to temporarily set the + # new value of the instruction sequence, yield, and then set it back. + def with_child_iseq(child_iseq) + parent_iseq = iseq + + begin + @iseq = child_iseq + yield + child_iseq + ensure + @iseq = parent_iseq + end + end + + # When we're compiling the last statement of a set of statements within a + # scope, the instructions sometimes change from pops to leaves. These + # kinds of peephole optimizations can reduce the overall number of + # instructions. Therefore, we keep track of whether we're compiling the + # last statement of a scope and allow visit methods to query that + # information. + def with_last_statement + previous = @last_statement + @last_statement = true + + begin + yield + ensure + @last_statement = previous + end + end + + def last_statement? + @last_statement + end + + # OpAssign nodes can have a number of different kinds of nodes as their + # "target" (i.e., the left-hand side of the assignment). When compiling + # these nodes we typically need to first fetch the current value of the + # variable, then perform some kind of action, then store the result back + # into the variable. This method handles that by first fetching the value, + # then yielding to the block, then storing the result. + def with_opassign(node) + case node.target + when ARefField + iseq.putnil + visit(node.target.collection) + visit(node.target.index) + + iseq.dupn(2) + iseq.send(YARV.calldata(:[], 1)) + + yield + + iseq.setn(3) + iseq.send(YARV.calldata(:[]=, 2)) + iseq.pop + when ConstPathField + name = node.target.constant.value.to_sym + + visit(node.target.parent) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + + yield + + if node.operator.value == "&&=" + iseq.dupn(2) + else + iseq.swap + iseq.topn(1) + end + + iseq.swap + iseq.setconstant(name) + when TopConstField + name = node.target.constant.value.to_sym + + iseq.putobject(Object) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + + yield + + if node.operator.value == "&&=" + iseq.dupn(2) + else + iseq.swap + iseq.topn(1) + end + + iseq.swap + iseq.setconstant(name) + when VarField + case node.target.value + when Const + names = constant_names(node.target) + iseq.opt_getconstant_path(names) + + yield + + iseq.dup + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(names.last) + when CVar + name = node.target.value.value.to_sym + iseq.getclassvariable(name) + + yield + + iseq.dup + iseq.setclassvariable(name) + when GVar + name = node.target.value.value.to_sym + iseq.getglobal(name) + + yield + + iseq.dup + iseq.setglobal(name) + when Ident + local_variable = visit(node.target) + iseq.getlocal(local_variable.index, local_variable.level) + + yield + + iseq.dup + iseq.setlocal(local_variable.index, local_variable.level) + when IVar + name = node.target.value.value.to_sym + iseq.getinstancevariable(name) + + yield + + iseq.dup + iseq.setinstancevariable(name) + end + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 7a6e8893..d606e3cc 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -87,166 +87,113 @@ def disassemble(iseq) when GetLocalWC0 local = iseq.local_table.locals[insn.index] clause << VarRef(Ident(local.name.to_s)) - when Array - case insn[0] - when :jump - clause << Assign(disasm_label.field, node_for(insn[1])) - clause << Next(Args([])) - when :leave - value = Args([clause.pop]) - clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) - when :opt_and - left, right = clause.pop(2) - clause << Binary(left, :&, right) - when :opt_aref - collection, arg = clause.pop(2) - clause << ARef(collection, Args([arg])) - when :opt_aset - collection, arg, value = clause.pop(3) + when Jump + clause << Assign(disasm_label.field, node_for(insn.label)) + clause << Next(Args([])) + when Leave + value = Args([clause.pop]) + clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) + when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, + OptMinus, OptMod, OptMult, OptOr, OptPlus + left, right = clause.pop(2) + clause << Binary(left, insn.calldata.method, right) + when OptAref + collection, arg = clause.pop(2) + clause << ARef(collection, Args([arg])) + when OptAset + collection, arg, value = clause.pop(3) - clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && - collection === value.left.collection && - arg === value.left.index.parts[0] - OpAssign( - ARefField(collection, Args([arg])), - Op("#{value.operator}="), - value.right - ) - else - Assign(ARefField(collection, Args([arg])), value) - end - when :opt_div - left, right = clause.pop(2) - clause << Binary(left, :/, right) - when :opt_eq - left, right = clause.pop(2) - clause << Binary(left, :==, right) - when :opt_ge - left, right = clause.pop(2) - clause << Binary(left, :>=, right) - when :opt_gt - left, right = clause.pop(2) - clause << Binary(left, :>, right) - when :opt_le - left, right = clause.pop(2) - clause << Binary(left, :<=, right) - when :opt_lt - left, right = clause.pop(2) - clause << Binary(left, :<, right) - when :opt_ltlt - left, right = clause.pop(2) - clause << Binary(left, :<<, right) - when :opt_minus - left, right = clause.pop(2) - clause << Binary(left, :-, right) - when :opt_mod - left, right = clause.pop(2) - clause << Binary(left, :%, right) - when :opt_mult - left, right = clause.pop(2) - clause << Binary(left, :*, right) - when :opt_neq - left, right = clause.pop(2) - clause << Binary(left, :"!=", right) - when :opt_or - left, right = clause.pop(2) - clause << Binary(left, :|, right) - when :opt_plus - left, right = clause.pop(2) - clause << Binary(left, :+, right) - when :opt_send_without_block - if insn[1][:flag] & VM_CALL_FCALL > 0 - if insn[1][:orig_argc] == 0 - clause.pop - clause << CallNode(nil, nil, Ident(insn[1][:mid]), Args([])) - elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") - _receiver, argument = clause.pop(2) - clause << Assign( - CallNode(nil, nil, Ident(insn[1][:mid][0..-2]), nil), - argument - ) - else - _receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) - clause << CallNode( - nil, - nil, - Ident(insn[1][:mid]), - ArgParen(Args(arguments)) - ) - end - else - if insn[1][:orig_argc] == 0 - clause << CallNode( - clause.pop, - Period("."), - Ident(insn[1][:mid]), - nil - ) - elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") - receiver, argument = clause.pop(2) - clause << Assign( - CallNode( - receiver, - Period("."), - Ident(insn[1][:mid][0..-2]), - nil - ), - argument - ) - else - receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) - clause << CallNode( - receiver, - Period("."), - Ident(insn[1][:mid]), - ArgParen(Args(arguments)) - ) - end - end - when :putobject - case insn[1] - when Float - clause << FloatLiteral(insn[1].inspect) - when Integer - clause << Int(insn[1].inspect) - else - raise "Unknown object type: #{insn[1].class.name}" - end - when :putobject_INT2FIX_0_ - clause << Int("0") - when :putobject_INT2FIX_1_ - clause << Int("1") - when :putself - clause << VarRef(Kw("self")) - when :setglobal - target = GVar(insn[1].to_s) - value = clause.pop + clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && + collection === value.left.collection && + arg === value.left.index.parts[0] + OpAssign( + ARefField(collection, Args([arg])), + Op("#{value.operator}="), + value.right + ) + else + Assign(ARefField(collection, Args([arg])), value) + end + when OptNEq + left, right = clause.pop(2) + clause << Binary(left, :"!=", right) + when OptSendWithoutBlock + method = insn.calldata.method.to_s + argc = insn.calldata.argc - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign( - VarField(target), - Op("#{value.operator}="), - value.right + if insn.calldata.flag?(CallData::CALL_FCALL) + if argc == 0 + clause.pop + clause << CallNode(nil, nil, Ident(method), Args([])) + elsif argc == 1 && method.end_with?("=") + _receiver, argument = clause.pop(2) + clause << Assign( + CallNode(nil, nil, Ident(method[0..-2]), nil), + argument ) else - Assign(VarField(target), value) + _receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + nil, + nil, + Ident(method), + ArgParen(Args(arguments)) + ) end - when :setlocal_WC_0 - target = Ident(local_name(insn[1], 0)) - value = clause.pop - - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign( - VarField(target), - Op("#{value.operator}="), - value.right + else + if argc == 0 + clause << CallNode(clause.pop, Period("."), Ident(method), nil) + elsif argc == 1 && method.end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign( + CallNode(receiver, Period("."), Ident(method[0..-2]), nil), + argument ) else - Assign(VarField(target), value) + receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + receiver, + Period("."), + Ident(method), + ArgParen(Args(arguments)) + ) end + end + when PutObject + case insn.object + when Float + clause << FloatLiteral(insn.object.inspect) + when Integer + clause << Int(insn.object.inspect) + else + raise "Unknown object type: #{insn.object.class.name}" + end + when PutObjectInt2Fix0 + clause << Int("0") + when PutObjectInt2Fix1 + clause << Int("1") + when PutSelf + clause << VarRef(Kw("self")) + when SetGlobal + target = GVar(insn.name.to_s) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + Assign(VarField(target), value) + end + when SetLocalWC0 + target = Ident(local_name(insn.index, 0)) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) else - raise "Unknown instruction #{insn[0]}" + Assign(VarField(target), value) end + else + raise "Unknown instruction #{insn[0]}" end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb new file mode 100644 index 00000000..c59d02c7 --- /dev/null +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -0,0 +1,671 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module provides an object representation of the YARV bytecode. + module YARV + # This class is meant to mirror RubyVM::InstructionSequence. It contains a + # list of instructions along with the metadata pertaining to them. It also + # functions as a builder for the instruction sequence. + class InstructionSequence + MAGIC = "YARVInstructionSequence/SimpleDataFormat" + + # This provides a handle to the rb_iseq_load function, which allows you to + # pass a serialized iseq to Ruby and have it return a + # RubyVM::InstructionSequence object. + ISEQ_LOAD = + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + + # This object is used to track the size of the stack at any given time. It + # is effectively a mini symbolic interpreter. It's necessary because when + # instruction sequences get serialized they include a :stack_max field on + # them. This field is used to determine how much stack space to allocate + # for the instruction sequence. + class Stack + attr_reader :current_size, :maximum_size + + def initialize + @current_size = 0 + @maximum_size = 0 + end + + def change_by(value) + @current_size += value + @maximum_size = @current_size if @current_size > @maximum_size + end + end + + # The type of the instruction sequence. + attr_reader :type + + # The name of the instruction sequence. + attr_reader :name + + # The parent instruction sequence, if there is one. + attr_reader :parent_iseq + + # The location of the root node of this instruction sequence. + attr_reader :location + + # This is the list of information about the arguments to this + # instruction sequence. + attr_accessor :argument_size + attr_reader :argument_options + + # The list of instructions for this instruction sequence. + attr_reader :insns + + # The table of local variables. + attr_reader :local_table + + # The hash of names of instance and class variables pointing to the + # index of their associated inline storage. + attr_reader :inline_storages + + # The index of the next inline storage that will be created. + attr_reader :storage_index + + # An object that will track the current size of the stack and the + # maximum size of the stack for this instruction sequence. + attr_reader :stack + + # These are various compilation options provided. + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + def initialize( + type, + name, + parent_iseq, + location, + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @type = type + @name = name + @parent_iseq = parent_iseq + @location = location + + @argument_size = 0 + @argument_options = {} + + @local_table = LocalTable.new + @inline_storages = {} + @insns = [] + @storage_index = 0 + @stack = Stack.new + + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + end + + ########################################################################## + # Query methods + ########################################################################## + + def local_variable(name, level = 0) + if (lookup = local_table.find(name, level)) + lookup + elsif parent_iseq + parent_iseq.local_variable(name, level + 1) + end + end + + def inline_storage + storage = storage_index + @storage_index += 1 + storage + end + + def inline_storage_for(name) + inline_storages[name] = inline_storage unless inline_storages.key?(name) + + inline_storages[name] + end + + def length + insns.inject(0) do |sum, insn| + case insn + when Integer, Symbol + sum + else + sum + insn.length + end + end + end + + def eval + compiled = to_a + + # Temporary hack until we get these working. + compiled[4][:node_id] = 11 + compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] + + Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval + end + + def to_a + versions = RUBY_VERSION.split(".").map(&:to_i) + + [ + MAGIC, + versions[0], + versions[1], + 1, + { + arg_size: argument_size, + local_size: local_table.size, + stack_max: stack.maximum_size + }, + name, + "", + "", + location.start_line, + type, + local_table.names, + argument_options, + [], + insns.map do |insn| + insn.is_a?(Integer) || insn.is_a?(Symbol) ? insn : insn.to_a(self) + end + ] + end + + ########################################################################## + # Child instruction sequence methods + ########################################################################## + + def child_iseq(type, name, location) + InstructionSequence.new( + type, + name, + self, + location, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + end + + def block_child_iseq(location) + current = self + current = current.parent_iseq while current.type == :block + child_iseq(:block, "block in #{current.name}", location) + end + + def class_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def method_child_iseq(name, location) + child_iseq(:method, name, location) + end + + def module_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def singleton_class_child_iseq(location) + child_iseq(:class, "singleton class", location) + end + + ########################################################################## + # Instruction push methods + ########################################################################## + + def push(insn) + insns << insn + + case insn + when Integer, Symbol, Array + insn + else + stack.change_by(-insn.pops + insn.pushes) + insn + end + end + + # This creates a new label at the current length of the instruction + # sequence. It is used as the operand for jump instructions. + def label + name = :"label_#{length}" + insns.last == name ? name : event(name) + end + + def event(name) + push(name) + end + + def adjuststack(number) + push(AdjustStack.new(number)) + end + + def anytostring + push(AnyToString.new) + end + + def branchif(label) + push(BranchIf.new(label)) + end + + def branchnil(label) + push(BranchNil.new(label)) + end + + def branchunless(label) + push(BranchUnless.new(label)) + end + + def checkkeyword(keyword_bits_index, keyword_index) + push(CheckKeyword.new(keyword_bits_index, keyword_index)) + end + + def checkmatch(type) + push(CheckMatch.new(type)) + end + + def checktype(type) + push(CheckType.new(type)) + end + + def concatarray + push(ConcatArray.new) + end + + def concatstrings(number) + push(ConcatStrings.new(number)) + end + + def defined(type, name, message) + push(Defined.new(type, name, message)) + end + + def defineclass(name, class_iseq, flags) + push(DefineClass.new(name, class_iseq, flags)) + end + + def definemethod(name, method_iseq) + push(DefineMethod.new(name, method_iseq)) + end + + def definesmethod(name, method_iseq) + push(DefineSMethod.new(name, method_iseq)) + end + + def dup + push(Dup.new) + end + + def duparray(object) + push(DupArray.new(object)) + end + + def duphash(object) + push(DupHash.new(object)) + end + + def dupn(number) + push(DupN.new(number)) + end + + def expandarray(length, flags) + push(ExpandArray.new(length, flags)) + end + + def getblockparam(index, level) + push(GetBlockParam.new(index, level)) + end + + def getblockparamproxy(index, level) + push(GetBlockParamProxy.new(index, level)) + end + + def getclassvariable(name) + if RUBY_VERSION < "3.0" + push(Legacy::GetClassVariable.new(name)) + else + push(GetClassVariable.new(name, inline_storage_for(name))) + end + end + + def getconstant(name) + push(GetConstant.new(name)) + end + + def getglobal(name) + push(GetGlobal.new(name)) + end + + def getinstancevariable(name) + if RUBY_VERSION < "3.2" + push(GetInstanceVariable.new(name, inline_storage_for(name))) + else + push(GetInstanceVariable.new(name, inline_storage)) + end + end + + def getlocal(index, level) + if operands_unification + # Specialize the getlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will look at the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + push(GetLocalWC0.new(index)) + when 1 + push(GetLocalWC1.new(index)) + else + push(GetLocal.new(index, level)) + end + else + push(GetLocal.new(index, level)) + end + end + + def getspecial(key, type) + push(GetSpecial.new(key, type)) + end + + def intern + push(Intern.new) + end + + def invokeblock(calldata) + push(InvokeBlock.new(calldata)) + end + + def invokesuper(calldata, block_iseq) + push(InvokeSuper.new(calldata, block_iseq)) + end + + def jump(label) + push(Jump.new(label)) + end + + def leave + push(Leave.new) + end + + def newarray(number) + push(NewArray.new(number)) + end + + def newarraykwsplat(number) + push(NewArrayKwSplat.new(number)) + end + + def newhash(number) + push(NewHash.new(number)) + end + + def newrange(exclude_end) + push(NewRange.new(exclude_end)) + end + + def nop + push(Nop.new) + end + + def objtostring(calldata) + push(ObjToString.new(calldata)) + end + + def once(iseq, cache) + push(Once.new(iseq, cache)) + end + + def opt_aref_with(object, calldata) + push(OptArefWith.new(object, calldata)) + end + + def opt_aset_with(object, calldata) + push(OptAsetWith.new(object, calldata)) + end + + def opt_getconstant_path(names) + if RUBY_VERSION < "3.2" + cache = inline_storage + getinlinecache = opt_getinlinecache(-1, cache) + + if names[0] == :"" + names.shift + pop + putobject(Object) + end + + names.each_with_index do |name, index| + putobject(index == 0) + getconstant(name) + end + + opt_setinlinecache(cache) + getinlinecache.patch!(self) + else + push(OptGetConstantPath.new(names)) + end + end + + def opt_getinlinecache(label, cache) + push(Legacy::OptGetInlineCache.new(label, cache)) + end + + def opt_newarray_max(length) + if specialized_instruction + push(OptNewArrayMax.new(length)) + else + newarray(length) + send(YARV.calldata(:max)) + end + end + + def opt_newarray_min(length) + if specialized_instruction + push(OptNewArrayMin.new(length)) + else + newarray(length) + send(YARV.calldata(:min)) + end + end + + def opt_setinlinecache(cache) + push(Legacy::OptSetInlineCache.new(cache)) + end + + def opt_str_freeze(object) + if specialized_instruction + push(OptStrFreeze.new(object, YARV.calldata(:freeze))) + else + putstring(object) + send(YARV.calldata(:freeze)) + end + end + + def opt_str_uminus(object) + if specialized_instruction + push(OptStrUMinus.new(object, YARV.calldata(:-@))) + else + putstring(object) + send(YARV.calldata(:-@)) + end + end + + def pop + push(Pop.new) + end + + def putnil + push(PutNil.new) + end + + def putobject(object) + if operands_unification + # Specialize the putobject instruction based on the value of the + # object. If it's 0 or 1, then there's a specialized instruction + # that will push the object onto the stack and requires fewer + # operands. + if object.eql?(0) + push(PutObjectInt2Fix0.new) + elsif object.eql?(1) + push(PutObjectInt2Fix1.new) + else + push(PutObject.new(object)) + end + else + push(PutObject.new(object)) + end + end + + def putself + push(PutSelf.new) + end + + def putspecialobject(object) + push(PutSpecialObject.new(object)) + end + + def putstring(object) + push(PutString.new(object)) + end + + def send(calldata, block_iseq = nil) + if specialized_instruction && !block_iseq && + !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and the + # number of arguments. + case [calldata.method, calldata.argc] + when [:length, 0] + push(OptLength.new(calldata)) + when [:size, 0] + push(OptSize.new(calldata)) + when [:empty?, 0] + push(OptEmptyP.new(calldata)) + when [:nil?, 0] + push(OptNilP.new(calldata)) + when [:succ, 0] + push(OptSucc.new(calldata)) + when [:!, 0] + push(OptNot.new(calldata)) + when [:+, 1] + push(OptPlus.new(calldata)) + when [:-, 1] + push(OptMinus.new(calldata)) + when [:*, 1] + push(OptMult.new(calldata)) + when [:/, 1] + push(OptDiv.new(calldata)) + when [:%, 1] + push(OptMod.new(calldata)) + when [:==, 1] + push(OptEq.new(calldata)) + when [:!=, 1] + push(OptNEq.new(YARV.calldata(:==, 1), calldata)) + when [:=~, 1] + push(OptRegExpMatch2.new(calldata)) + when [:<, 1] + push(OptLT.new(calldata)) + when [:<=, 1] + push(OptLE.new(calldata)) + when [:>, 1] + push(OptGT.new(calldata)) + when [:>=, 1] + push(OptGE.new(calldata)) + when [:<<, 1] + push(OptLTLT.new(calldata)) + when [:[], 1] + push(OptAref.new(calldata)) + when [:&, 1] + push(OptAnd.new(calldata)) + when [:|, 1] + push(OptOr.new(calldata)) + when [:[]=, 2] + push(OptAset.new(calldata)) + else + push(OptSendWithoutBlock.new(calldata)) + end + else + push(Send.new(calldata, block_iseq)) + end + end + + def setblockparam(index, level) + push(SetBlockParam.new(index, level)) + end + + def setclassvariable(name) + if RUBY_VERSION < "3.0" + push(Legacy::SetClassVariable.new(name)) + else + push(SetClassVariable.new(name, inline_storage_for(name))) + end + end + + def setconstant(name) + push(SetConstant.new(name)) + end + + def setglobal(name) + push(SetGlobal.new(name)) + end + + def setinstancevariable(name) + if RUBY_VERSION < "3.2" + push(SetInstanceVariable.new(name, inline_storage_for(name))) + else + push(SetInstanceVariable.new(name, inline_storage)) + end + end + + def setlocal(index, level) + if operands_unification + # Specialize the setlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will write to the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + push(SetLocalWC0.new(index)) + when 1 + push(SetLocalWC1.new(index)) + else + push(SetLocal.new(index, level)) + end + else + push(SetLocal.new(index, level)) + end + end + + def setn(number) + push(SetN.new(number)) + end + + def setspecial(key) + push(SetSpecial.new(key)) + end + + def splatarray(flag) + push(SplatArray.new(flag)) + end + + def swap + push(Swap.new) + end + + def topn(number) + push(TopN.new(number)) + end + + def toregexp(options, length) + push(ToRegExp.new(options, length)) + end + end + end +end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index e6853a87..5a23bbf0 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -2,6 +2,58 @@ module SyntaxTree module YARV + # This is an operand to various YARV instructions that represents the + # information about a specific call site. + class CallData + CALL_ARGS_SPLAT = 1 << 0 + CALL_ARGS_BLOCKARG = 1 << 1 + CALL_FCALL = 1 << 2 + CALL_VCALL = 1 << 3 + CALL_ARGS_SIMPLE = 1 << 4 + CALL_BLOCKISEQ = 1 << 5 + CALL_KWARG = 1 << 6 + CALL_KW_SPLAT = 1 << 7 + CALL_TAILCALL = 1 << 8 + CALL_SUPER = 1 << 9 + CALL_ZSUPER = 1 << 10 + CALL_OPT_SEND = 1 << 11 + CALL_KW_SPLAT_MUT = 1 << 12 + + attr_reader :method, :argc, :flags, :kw_arg + + def initialize( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + @method = method + @argc = argc + @flags = flags + @kw_arg = kw_arg + end + + def flag?(mask) + (flags & mask) > 0 + end + + def to_h + result = { mid: method, flag: flags, orig_argc: argc } + result[:kw_arg] = kw_arg if kw_arg + result + end + end + + # A convenience method for creating a CallData object. + def self.calldata( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + CallData.new(method, argc, flags, kw_arg) + end + # ### Summary # # `adjuststack` accepts a single integer argument and removes that many @@ -260,6 +312,109 @@ def pushes end end + # ### Summary + # + # `checkmatch` checks if the current pattern matches the current value. It + # pops the target and the pattern off the stack and pushes a boolean onto + # the stack if it matches or not. + # + # ### Usage + # + # ~~~ruby + # foo in Foo + # ~~~ + # + class CheckMatch + TYPE_WHEN = 1 + TYPE_CASE = 2 + TYPE_RESCUE = 3 + + attr_reader :type + + def initialize(type) + @type = type + end + + def to_a(_iseq) + [:checkmatch, type] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `checktype` checks if the value on top of the stack is of a certain type. + # The type is the only argument. It pops the value off the stack and pushes + # a boolean onto the stack indicating whether or not the value is of the + # given type. + # + # ### Usage + # + # ~~~ruby + # foo in [bar] + # ~~~ + # + class CheckType + TYPE_OBJECT = 0x01 + TYPE_CLASS = 0x02 + TYPE_MODULE = 0x03 + TYPE_FLOAT = 0x04 + TYPE_STRING = 0x05 + TYPE_REGEXP = 0x06 + TYPE_ARRAY = 0x07 + TYPE_HASH = 0x08 + TYPE_STRUCT = 0x09 + TYPE_BIGNUM = 0x0a + TYPE_FILE = 0x0b + TYPE_DATA = 0x0c + TYPE_MATCH = 0x0d + TYPE_COMPLEX = 0x0e + TYPE_RATIONAL = 0x0f + TYPE_NIL = 0x11 + TYPE_TRUE = 0x12 + TYPE_FALSE = 0x13 + TYPE_SYMBOL = 0x14 + TYPE_FIXNUM = 0x15 + TYPE_UNDEF = 0x16 + + attr_reader :type + + def initialize(type) + @type = type + end + + def to_a(_iseq) + [:checktype, type] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + # TODO: This is incorrect. The instruction only pushes a single value + # onto the stack. However, if this is set to 1, we no longer match the + # output of RubyVM::InstructionSequence. So leaving this here until we + # can investigate further. + 2 + end + end + # ### Summary # # `concatarray` concatenates the two Arrays on top of the stack. @@ -800,6 +955,42 @@ def pushes end end + # ### Summary + # + # `getconstant` performs a constant lookup and pushes the value of the + # constant onto the stack. It pops both the class it should look in and + # whether or not it should look globally as well. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class GetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + # ### Summary # # `getglobal` pushes the value of a global variables onto the stack. @@ -991,84 +1182,2425 @@ def pushes end end - # This module contains the instructions that used to be a part of YARV but - # have been replaced or removed in more recent versions. - module Legacy - # ### Summary - # - # `getclassvariable` looks for a class variable in the current class and - # pushes its value onto the stack. - # - # This version of the `getclassvariable` instruction is no longer used - # since in Ruby 3.0 it gained an inline cache.` - # - # ### Usage - # - # ~~~ruby - # @@class_variable - # ~~~ - # - class GetClassVariable - attr_reader :name - - def initialize(name) - @name = name - end - - def to_a(_iseq) - [:getclassvariable, name] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - end - - # ### Summary - # - # `getconstant` performs a constant lookup and pushes the value of the - # constant onto the stack. It pops both the class it should look in and - # whether or not it should look globally as well. - # - # This instruction is no longer used since in Ruby 3.2 it was replaced by - # the consolidated `opt_getconstant_path` instruction. - # - # ### Usage - # - # ~~~ruby - # Constant - # ~~~ - # - class GetConstant - attr_reader :name - - def initialize(name) - @name = name - end - - def to_a(_iseq) - [:getconstant, name] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end + # ### Summary + # + # `getspecial` pushes the value of a special local variable onto the stack. + # + # ### Usage + # + # ~~~ruby + # [true] + # ~~~ + # + class GetSpecial + SVAR_LASTLINE = 0 # $_ + SVAR_BACKREF = 1 # $~ + SVAR_FLIPFLOP_START = 2 # flipflop + + attr_reader :key, :type + + def initialize(key, type) + @key = key + @type = type + end + + def to_a(_iseq) + [:getspecial, key, type] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `intern` converts the top element of the stack to a symbol and pushes the + # symbol onto the stack. + # + # ### Usage + # + # ~~~ruby + # :"#{"foo"}" + # ~~~ + # + class Intern + def to_a(_iseq) + [:intern] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `invokeblock` invokes the block given to the current method. It pops the + # arguments for the block off the stack and pushes the result of running the + # block onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo + # yield + # end + # ~~~ + # + class InvokeBlock + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:invokeblock, calldata.to_h] + end + + def length + 2 + end + + def pops + calldata.argc + end + + def pushes + 1 + end + end + + # ### Summary + # + # `invokesuper` is similar to the `send` instruction, except that it calls + # the super method. It pops the receiver and arguments off the stack and + # pushes the return value onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo + # super + # end + # ~~~ + # + class InvokeSuper + attr_reader :calldata, :block_iseq + + def initialize(calldata, block_iseq) + @calldata = calldata + @block_iseq = block_iseq + end + + def to_a(_iseq) + [:invokesuper, calldata.to_h, block_iseq&.to_a] + end + + def length + 1 + end + + def pops + argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) + argb + calldata.argc + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `jump` unconditionally jumps to the label given as its only argument. + # + # ### Usage + # + # ~~~ruby + # x = 0 + # if x == 0 + # puts "0" + # else + # puts "2" + # end + # ~~~ + # + class Jump + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:jump, label] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `leave` exits the current frame. + # + # ### Usage + # + # ~~~ruby + # ;; + # ~~~ + # + class Leave + def to_a(_iseq) + [:leave] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + # TODO: This is wrong. It should be 1. But it's 0 for now because + # otherwise the stack size is incorrectly calculated. + 0 + end + end + + # ### Summary + # + # `newarray` puts a new array initialized with `number` values from the + # stack. It pops `number` values off the stack and pushes the array onto the + # stack. + # + # ### Usage + # + # ~~~ruby + # ["string"] + # ~~~ + # + class NewArray + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:newarray, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `newarraykwsplat` is a specialized version of `newarray` that takes a ** + # splat argument. It pops `number` values off the stack and pushes the array + # onto the stack. + # + # ### Usage + # + # ~~~ruby + # ["string", **{ foo: "bar" }] + # ~~~ + # + class NewArrayKwSplat + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:newarraykwsplat, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `newhash` puts a new hash onto the stack, using `number` elements from the + # stack. `number` needs to be even. It pops `number` elements off the stack + # and pushes a hash onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo(key, value) + # { key => value } + # end + # ~~~ + # + class NewHash + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:newhash, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `newrange` creates a new range object from the top two values on the + # stack. It pops both of them off, and then pushes on the new range. It + # takes one argument which is 0 if the end is included or 1 if the end value + # is excluded. + # + # ### Usage + # + # ~~~ruby + # x = 0 + # y = 1 + # p (x..y), (x...y) + # ~~~ + # + class NewRange + attr_reader :exclude_end + + def initialize(exclude_end) + @exclude_end = exclude_end + end + + def to_a(_iseq) + [:newrange, exclude_end] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `nop` is a no-operation instruction. It is used to pad the instruction + # sequence so there is a place for other instructions to jump to. + # + # ### Usage + # + # ~~~ruby + # raise rescue true + # ~~~ + # + class Nop + def to_a(_iseq) + [:nop] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `objtostring` pops a value from the stack, calls `to_s` on that value and + # then pushes the result back to the stack. + # + # It has various fast paths for classes like String, Symbol, Module, Class, + # etc. For everything else it calls `to_s`. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class ObjToString + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:objtostring, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `once` is an instruction that wraps an instruction sequence and ensures + # that is it only ever executed once for the lifetime of the program. It + # uses a cache to ensure that it is only executed once. It pushes the result + # of running the instruction sequence onto the stack. + # + # ### Usage + # + # ~~~ruby + # END { puts "END" } + # ~~~ + # + class Once + attr_reader :iseq, :cache + + def initialize(iseq, cache) + @iseq = iseq + @cache = cache + end + + def to_a(_iseq) + [:once, iseq.to_a, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_and` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `&` operator is used. There is a fast path for if + # both operands are integers. It pops both the receiver and the argument off + # the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 & 3 + # ~~~ + # + class OptAnd + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_and, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aref` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `[]` operator is used. There are fast paths if the + # receiver is an integer, array, or hash. + # + # ### Usage + # + # ~~~ruby + # 7[2] + # ~~~ + # + class OptAref + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aref, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aref_with` is a specialization of the `opt_aref` instruction that + # occurs when the `[]` operator is used with a string argument known at + # compile time. There are fast paths if the receiver is a hash. It pops the + # receiver off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # { 'test' => true }['test'] + # ~~~ + # + class OptArefWith + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aref_with, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aset` is an instruction for setting the hash value by the key in + # the `recv[obj] = set` format. It is a specialization of the + # `opt_send_without_block` instruction. It pops the receiver, the key, and + # the value off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # {}[:key] = value + # ~~~ + # + class OptAset + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aset, calldata.to_h] + end + + def length + 2 + end + + def pops + 3 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aset_with` is an instruction for setting the hash value by the known + # string key in the `recv[obj] = set` format. It pops the receiver and the + # value off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # {}["key"] = value + # ~~~ + # + class OptAsetWith + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aset_with, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_div` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `/` operator is used. There are fast paths for if + # both operands are integers, or if both operands are floats. It pops both + # the receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 / 3 + # ~~~ + # + class OptDiv + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_div, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_empty_p` is an optimization applied when the method `empty?` is + # called. It pops the receiver off the stack and pushes on the result of the + # method call. + # + # ### Usage + # + # ~~~ruby + # "".empty? + # ~~~ + # + class OptEmptyP + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_empty_p, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_eq` is a specialization of the `opt_send_without_block` instruction + # that occurs when the == operator is used. Fast paths exist when both + # operands are integers, floats, symbols or strings. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 == 2 + # ~~~ + # + class OptEq + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_eq, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_ge` is a specialization of the `opt_send_without_block` instruction + # that occurs when the >= operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 >= 3 + # ~~~ + # + class OptGE + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_ge, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_getconstant_path` performs a constant lookup on a chain of constant + # names. It accepts as its argument an array of constant names, and pushes + # the value of the constant onto the stack. + # + # ### Usage + # + # ~~~ruby + # ::Object + # ~~~ + # + class OptGetConstantPath + attr_reader :names + + def initialize(names) + @names = names + end + + def to_a(_iseq) + [:opt_getconstant_path, names] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_gt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the > operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 > 3 + # ~~~ + # + class OptGT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_gt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_le` is a specialization of the `opt_send_without_block` instruction + # that occurs when the <= operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 <= 4 + # ~~~ + # + class OptLE + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_le, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_length` is a specialization of `opt_send_without_block`, when the + # `length` method is called. There are fast paths when the receiver is + # either a string, hash, or array. It pops the receiver off the stack and + # pushes on the result of the method call. + # + # ### Usage + # + # ~~~ruby + # "".length + # ~~~ + # + class OptLength + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_length, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_lt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the < operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 < 4 + # ~~~ + # + class OptLT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_lt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_ltlt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `<<` operator is used. Fast paths exists when the + # receiver is either a String or an Array. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "" << 2 + # ~~~ + # + class OptLTLT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_ltlt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_minus` is a specialization of the `opt_send_without_block` + # instruction that occurs when the `-` operator is used. There are fast + # paths for if both operands are integers or if both operands are floats. It + # pops both the receiver and the argument off the stack and pushes on the + # result. + # + # ### Usage + # + # ~~~ruby + # 3 - 2 + # ~~~ + # + class OptMinus + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_minus, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_mod` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `%` operator is used. There are fast paths for if + # both operands are integers or if both operands are floats. It pops both + # the receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 % 2 + # ~~~ + # + class OptMod + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_mod, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_mult` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `*` operator is used. There are fast paths for if + # both operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 * 2 + # ~~~ + # + class OptMult + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_mult, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_neq` is an optimization that tests whether two values at the top of + # the stack are not equal by testing their equality and calling the `!` on + # the result. This allows `opt_neq` to use the fast paths optimized in + # `opt_eq` when both operands are Integers, Floats, Symbols, or Strings. It + # pops both the receiver and the argument off the stack and pushes on the + # result. + # + # ### Usage + # + # ~~~ruby + # 2 != 2 + # ~~~ + # + class OptNEq + attr_reader :eq_calldata, :neq_calldata + + def initialize(eq_calldata, neq_calldata) + @eq_calldata = eq_calldata + @neq_calldata = neq_calldata + end + + def to_a(_iseq) + [:opt_neq, eq_calldata.to_h, neq_calldata.to_h] + end + + def length + 3 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_newarray_max` is a specialization that occurs when the `max` method + # is called on an array literal. It pops the values of the array off the + # stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # [1, 2, 3].max + # ~~~ + # + class OptNewArrayMax + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:opt_newarray_max, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_newarray_min` is a specialization that occurs when the `min` method + # is called on an array literal. It pops the values of the array off the + # stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # [1, 2, 3].min + # ~~~ + # + class OptNewArrayMin + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:opt_newarray_min, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_nil_p` is an optimization applied when the method `nil?` is called. + # It returns true immediately when the receiver is `nil` and defers to the + # `nil?` method in other cases. It pops the receiver off the stack and + # pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "".nil? + # ~~~ + # + class OptNilP + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_nil_p, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_not` negates the value on top of the stack by calling the `!` method + # on it. It pops the receiver off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # !true + # ~~~ + # + class OptNot + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_not, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_or` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `|` operator is used. There is a fast path for if + # both operands are integers. It pops both the receiver and the argument off + # the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 | 3 + # ~~~ + # + class OptOr + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_or, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_plus` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `+` operator is used. There are fast paths for if + # both operands are integers, floats, strings, or arrays. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 + 3 + # ~~~ + # + class OptPlus + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_plus, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_regexpmatch2` is a specialization of the `opt_send_without_block` + # instruction that occurs when the `=~` operator is used. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # /a/ =~ "a" + # ~~~ + # + class OptRegExpMatch2 + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_regexpmatch2, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_send_without_block` is a specialization of the send instruction that + # occurs when a method is being called without a block. It pops the receiver + # and the arguments off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # puts "Hello, world!" + # ~~~ + # + class OptSendWithoutBlock + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_send_without_block, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + calldata.argc + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_size` is a specialization of `opt_send_without_block`, when the + # `size` method is called. There are fast paths when the receiver is either + # a string, hash, or array. It pops the receiver off the stack and pushes on + # the result. + # + # ### Usage + # + # ~~~ruby + # "".size + # ~~~ + # + class OptSize + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_size, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_str_freeze` pushes a frozen known string value with no interpolation + # onto the stack using the #freeze method. If the method gets overridden, + # this will fall back to a send. + # + # ### Usage + # + # ~~~ruby + # "hello".freeze + # ~~~ + # + class OptStrFreeze + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_str_freeze, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_str_uminus` pushes a frozen known string value with no interpolation + # onto the stack. If the method gets overridden, this will fall back to a + # send. + # + # ### Usage + # + # ~~~ruby + # -"string" + # ~~~ + # + class OptStrUMinus + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_str_uminus, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_succ` is a specialization of the `opt_send_without_block` instruction + # when the method being called is `succ`. Fast paths exist when the receiver + # is either a String or a Fixnum. It pops the receiver off the stack and + # pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "".succ + # ~~~ + # + class OptSucc + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_succ, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `pop` pops the top value off the stack. + # + # ### Usage + # + # ~~~ruby + # a ||= 2 + # ~~~ + # + class Pop + def to_a(_iseq) + [:pop] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `putnil` pushes a global nil object onto the stack. + # + # ### Usage + # + # ~~~ruby + # nil + # ~~~ + # + class PutNil + def to_a(_iseq) + [:putnil] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putobject` pushes a known value onto the stack. + # + # ### Usage + # + # ~~~ruby + # 5 + # ~~~ + # + class PutObject + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:putobject, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putobject_INT2FIX_0_` pushes 0 on the stack. It is a specialized + # instruction resulting from the operand unification optimization. It is + # equivalent to `putobject 0`. + # + # ### Usage + # + # ~~~ruby + # 0 + # ~~~ + # + class PutObjectInt2Fix0 + def to_a(_iseq) + [:putobject_INT2FIX_0_] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putobject_INT2FIX_1_` pushes 1 on the stack. It is a specialized + # instruction resulting from the operand unification optimization. It is + # equivalent to `putobject 1`. + # + # ### Usage + # + # ~~~ruby + # 1 + # ~~~ + # + class PutObjectInt2Fix1 + def to_a(_iseq) + [:putobject_INT2FIX_1_] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putself` pushes the current value of self onto the stack. + # + # ### Usage + # + # ~~~ruby + # puts "Hello, world!" + # ~~~ + # + class PutSelf + def to_a(_iseq) + [:putself] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putspecialobject` pushes one of three special objects onto the stack. + # These are either the VM core special object, the class base special + # object, or the constant base special object. + # + # ### Usage + # + # ~~~ruby + # alias foo bar + # ~~~ + # + class PutSpecialObject + OBJECT_VMCORE = 1 + OBJECT_CBASE = 2 + OBJECT_CONST_BASE = 3 + + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:putspecialobject, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putstring` pushes an unfrozen string literal onto the stack. + # + # ### Usage + # + # ~~~ruby + # "foo" + # ~~~ + # + class PutString + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:putstring, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `send` invokes a method with an optional block. It pops its receiver and + # the arguments for the method off the stack and pushes the return value + # onto the stack. It has two arguments: the calldata for the call site and + # the optional block instruction sequence. + # + # ### Usage + # + # ~~~ruby + # "hello".tap { |i| p i } + # ~~~ + # + class Send + attr_reader :calldata, :block_iseq + + def initialize(calldata, block_iseq) + @calldata = calldata + @block_iseq = block_iseq + end + + def to_a(_iseq) + [:send, calldata.to_h, block_iseq&.to_a] + end + + def length + 3 + end + + def pops + argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) + argb + calldata.argc + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `setblockparam` sets the value of a block local variable on a frame + # determined by the level and index arguments. The level is the number of + # frames back to look and the index is the index in the local table. It pops + # the value it is setting off the stack. + # + # ### Usage + # + # ~~~ruby + # def foo(&bar) + # bar = baz + # end + # ~~~ + # + class SetBlockParam + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:setblockparam, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setclassvariable` looks for a class variable in the current class and + # sets its value to the value it pops off the top of the stack. It uses an + # inline cache to reduce the need to lookup the class variable in the class + # hierarchy every time. + # + # ### Usage + # + # ~~~ruby + # @@class_variable = 1 + # ~~~ + # + class SetClassVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:setclassvariable, name, cache] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setconstant` pops two values off the stack: the value to set the + # constant to and the constant base to set it in. + # + # ### Usage + # + # ~~~ruby + # Constant = 1 + # ~~~ + # + class SetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:setconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setglobal` sets the value of a global variable to a value popped off the + # top of the stack. + # + # ### Usage + # + # ~~~ruby + # $global = 5 + # ~~~ + # + class SetGlobal + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:setglobal, name] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setinstancevariable` pops a value off the top of the stack and then sets + # the instance variable associated with the instruction to that value. + # + # This instruction has two forms, but both have the same structure. Before + # Ruby 3.2, the inline cache corresponded to both the get and set + # instructions and could be shared. Since Ruby 3.2, it uses object shapes + # instead so the caches are unique per instruction. + # + # ### Usage + # + # ~~~ruby + # @instance_variable = 1 + # ~~~ + # + class SetInstanceVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:setinstancevariable, name, cache] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setlocal` sets the value of a local variable on a frame determined by the + # level and index arguments. The level is the number of frames back to + # look and the index is the index in the local table. It pops the value it + # is setting off the stack. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # tap { tap { value = 10 } } + # ~~~ + # + class SetLocal + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:setlocal, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setlocal_WC_0` is a specialized version of the `setlocal` instruction. It + # sets the value of a local variable on the current frame to the value at + # the top of the stack as determined by the index given as its only + # argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # ~~~ + # + class SetLocalWC0 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:setlocal_WC_0, iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setlocal_WC_1` is a specialized version of the `setlocal` instruction. It + # sets the value of a local variable on the parent frame to the value at the + # top of the stack as determined by the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # self.then { value = 10 } + # ~~~ + # + class SetLocalWC1 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:setlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setn` sets a value in the stack to a value popped off the top of the + # stack. It then pushes that value onto the top of the stack as well. + # + # ### Usage + # + # ~~~ruby + # {}[:key] = 'val' + # ~~~ + # + class SetN + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:setn, number] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `setspecial` pops a value off the top of the stack and sets a special + # local variable to that value. The special local variable is determined by + # the key given as its only argument. + # + # ### Usage + # + # ~~~ruby + # baz if (foo == 1) .. (bar == 1) + # ~~~ + # + class SetSpecial + attr_reader :key + + def initialize(key) + @key = key + end + + def to_a(_iseq) + [:setspecial, key] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `splatarray` coerces the array object at the top of the stack into Array + # by calling `to_a`. It pushes a duplicate of the array if there is a flag, + # and the original array if there isn't one. + # + # ### Usage + # + # ~~~ruby + # x = *(5) + # ~~~ + # + class SplatArray + attr_reader :flag + + def initialize(flag) + @flag = flag + end + + def to_a(_iseq) + [:splatarray, flag] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `swap` swaps the top two elements in the stack. + # + # ### TracePoint + # + # `swap` does not dispatch any events. + # + # ### Usage + # + # ~~~ruby + # !!defined?([[]]) + # ~~~ + # + class Swap + def to_a(_iseq) + [:swap] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 2 + end + end + + # ### Summary + # + # `topn` pushes a single value onto the stack that is a copy of the value + # within the stack that is `number` of slots down from the top. + # + # ### Usage + # + # ~~~ruby + # case 3 + # when 1..5 + # puts "foo" + # end + # ~~~ + # + class TopN + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:topn, number] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `toregexp` pops a number of values off the stack, combines them into a new + # regular expression, and pushes the new regular expression onto the stack. + # + # ### Usage + # + # ~~~ruby + # /foo #{bar}/ + # ~~~ + # + class ToRegExp + attr_reader :options, :length + + def initialize(options, length) + @options = options + @length = length + end + + def to_a(_iseq) + [:toregexp, options, length] + end + + def pops + length + end + + def pushes + 1 end end end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb new file mode 100644 index 00000000..45dfe768 --- /dev/null +++ b/lib/syntax_tree/yarv/legacy.rb @@ -0,0 +1,169 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This module contains the instructions that used to be a part of YARV but + # have been replaced or removed in more recent versions. + module Legacy + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. + # + # This version of the `getclassvariable` instruction is no longer used + # since in Ruby 3.0 it gained an inline cache.` + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariable + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getclassvariable, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_getinlinecache` is a wrapper around a series of `putobject` and + # `getconstant` instructions that allows skipping past them if the inline + # cache is currently set. It pushes the value of the cache onto the stack + # if it is set, otherwise it pushes `nil`. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class OptGetInlineCache + attr_reader :label, :cache + + def initialize(label, cache) + @label = label + @cache = cache + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:opt_getinlinecache, label, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_setinlinecache` sets an inline cache for a constant lookup. It pops + # the value it should set off the top of the stack. It then pushes that + # value back onto the top of the stack. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class OptSetInlineCache + attr_reader :cache + + def initialize(cache) + @cache = cache + end + + def to_a(_iseq) + [:opt_setinlinecache, cache] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `setclassvariable` looks for a class variable in the current class and + # sets its value to the value it pops off the top of the stack. + # + # This version of the `setclassvariable` instruction is no longer used + # since in Ruby 3.0 it gained an inline cache. + # + # ### Usage + # + # ~~~ruby + # @@class_variable = 1 + # ~~~ + # + class SetClassVariable + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:setclassvariable, name] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/local_table.rb b/lib/syntax_tree/yarv/local_table.rb new file mode 100644 index 00000000..5eac346c --- /dev/null +++ b/lib/syntax_tree/yarv/local_table.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This represents every local variable associated with an instruction + # sequence. There are two kinds of locals: plain locals that are what you + # expect, and block proxy locals, which represent local variables + # associated with blocks that were passed into the current instruction + # sequence. + class LocalTable + # A local representing a block passed into the current instruction + # sequence. + class BlockLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # A regular local variable. + class PlainLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # The result of looking up a local variable in the current local table. + class Lookup + attr_reader :local, :index, :level + + def initialize(local, index, level) + @local = local + @index = index + @level = level + end + end + + attr_reader :locals + + def initialize + @locals = [] + end + + def find(name, level = 0) + index = locals.index { |local| local.name == name } + Lookup.new(locals[index], index, level) if index + end + + def has?(name) + locals.any? { |local| local.name == name } + end + + def names + locals.map(&:name) + end + + def size + locals.length + end + + # Add a BlockLocal to the local table. + def block(name) + locals << BlockLocal.new(name) unless has?(name) + end + + # Add a PlainLocal to the local table. + def plain(name) + locals << PlainLocal.new(name) unless has?(name) + end + + # This is the offset from the top of the stack where this local variable + # lives. + def offset(index) + size - (index - 3) - 1 + end + end + end +end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index c2472432..6b185dea 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -485,13 +485,12 @@ def assert_compiles(source, **options) assert_equal( serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), - serialize_iseq(program.accept(Compiler.new(**options))) + serialize_iseq(program.accept(YARV::Compiler.new(**options))) ) end def assert_evaluates(expected, source, **options) - program = SyntaxTree.parse(source) - assert_equal expected, program.accept(Compiler.new(**options)).eval + assert_equal expected, YARV.compile(source, **options).eval end end end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 55cdb657..02514a93 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -47,8 +47,8 @@ def test_bf private def assert_disassembles(expected, source) - iseq = SyntaxTree.parse(source).accept(Compiler.new) - actual = Formatter.format(source, YARV::Disassembler.new(iseq).to_ruby) + ruby = YARV::Disassembler.new(YARV.compile(source)).to_ruby + actual = Formatter.format(source, ruby) assert_equal expected, actual end end From b6fb92ee9fe39bec7e547a307742c915e78bf5d4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 16:24:04 -0500 Subject: [PATCH 21/21] Get it working on TruffleRuby --- lib/syntax_tree/yarv/instruction_sequence.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index c59d02c7..411f4692 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -13,11 +13,14 @@ class InstructionSequence # pass a serialized iseq to Ruby and have it return a # RubyVM::InstructionSequence object. ISEQ_LOAD = - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) + begin + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + rescue NameError + end # This object is used to track the size of the stack at any given time. It # is effectively a mini symbolic interpreter. It's necessary because when @@ -141,6 +144,7 @@ def length end def eval + raise "Unsupported platform" if ISEQ_LOAD.nil? compiled = to_a # Temporary hack until we get these working.