diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 73add469..ade9ff5e 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -29,8 +29,12 @@ require_relative "syntax_tree/index" require_relative "syntax_tree/yarv" +require_relative "syntax_tree/yarv/basic_block" require_relative "syntax_tree/yarv/bf" +require_relative "syntax_tree/yarv/calldata" require_relative "syntax_tree/yarv/compiler" +require_relative "syntax_tree/yarv/control_flow_graph" +require_relative "syntax_tree/yarv/data_flow_graph" require_relative "syntax_tree/yarv/decompiler" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" diff --git a/lib/syntax_tree/yarv/basic_block.rb b/lib/syntax_tree/yarv/basic_block.rb new file mode 100644 index 00000000..6798a092 --- /dev/null +++ b/lib/syntax_tree/yarv/basic_block.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This object represents a single basic block, wherein all contained + # instructions do not branch except for the last one. + class BasicBlock + # This is the unique identifier for this basic block. + attr_reader :id + + # This is the index into the list of instructions where this block starts. + attr_reader :block_start + + # This is the set of instructions that this block contains. + attr_reader :insns + + # This is an array of basic blocks that lead into this block. + attr_reader :incoming_blocks + + # This is an array of basic blocks that this block leads into. + attr_reader :outgoing_blocks + + def initialize(block_start, insns) + @id = "block_#{block_start}" + + @block_start = block_start + @insns = insns + + @incoming_blocks = [] + @outgoing_blocks = [] + end + + # Yield each instruction in this basic block along with its index from the + # original instruction sequence. + def each_with_length + return enum_for(:each_with_length) unless block_given? + + length = block_start + insns.each do |insn| + yield insn, length + length += insn.length + end + end + + # This method is used to verify that the basic block is well formed. It + # checks that the only instruction in this basic block that branches is + # the last instruction. + def verify + insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } + end + end + end +end diff --git a/lib/syntax_tree/yarv/calldata.rb b/lib/syntax_tree/yarv/calldata.rb new file mode 100644 index 00000000..fadea61b --- /dev/null +++ b/lib/syntax_tree/yarv/calldata.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This is an operand to various YARV instructions that represents the + # information about a specific call site. + class CallData + CALL_ARGS_SPLAT = 1 << 0 + CALL_ARGS_BLOCKARG = 1 << 1 + CALL_FCALL = 1 << 2 + CALL_VCALL = 1 << 3 + CALL_ARGS_SIMPLE = 1 << 4 + CALL_BLOCKISEQ = 1 << 5 + CALL_KWARG = 1 << 6 + CALL_KW_SPLAT = 1 << 7 + CALL_TAILCALL = 1 << 8 + CALL_SUPER = 1 << 9 + CALL_ZSUPER = 1 << 10 + CALL_OPT_SEND = 1 << 11 + CALL_KW_SPLAT_MUT = 1 << 12 + + attr_reader :method, :argc, :flags, :kw_arg + + def initialize( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + @method = method + @argc = argc + @flags = flags + @kw_arg = kw_arg + end + + def flag?(mask) + (flags & mask) > 0 + end + + def to_h + result = { mid: method, flag: flags, orig_argc: argc } + result[:kw_arg] = kw_arg if kw_arg + result + end + + def inspect + names = [] + names << :ARGS_SPLAT if flag?(CALL_ARGS_SPLAT) + names << :ARGS_BLOCKARG if flag?(CALL_ARGS_BLOCKARG) + names << :FCALL if flag?(CALL_FCALL) + names << :VCALL if flag?(CALL_VCALL) + names << :ARGS_SIMPLE if flag?(CALL_ARGS_SIMPLE) + names << :BLOCKISEQ if flag?(CALL_BLOCKISEQ) + names << :KWARG if flag?(CALL_KWARG) + names << :KW_SPLAT if flag?(CALL_KW_SPLAT) + names << :TAILCALL if flag?(CALL_TAILCALL) + names << :SUPER if flag?(CALL_SUPER) + names << :ZSUPER if flag?(CALL_ZSUPER) + names << :OPT_SEND if flag?(CALL_OPT_SEND) + names << :KW_SPLAT_MUT if flag?(CALL_KW_SPLAT_MUT) + + parts = [] + parts << "mid:#{method}" if method + parts << "argc:#{argc}" + parts << "kw:[#{kw_arg.join(", ")}]" if kw_arg + parts << names.join("|") if names.any? + + "" + end + + def self.from(serialized) + new( + serialized[:mid], + serialized[:orig_argc], + serialized[:flag], + serialized[:kw_arg] + ) + end + end + + # A convenience method for creating a CallData object. + def self.calldata( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + CallData.new(method, argc, flags, kw_arg) + end + end +end diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb new file mode 100644 index 00000000..fb8f97f3 --- /dev/null +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -0,0 +1,184 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class represents a control flow graph of a YARV instruction sequence. + # It constructs a graph of basic blocks that hold subsets of the list of + # instructions from the instruction sequence. + # + # You can use this class by calling the ::compile method and passing it a + # YARV instruction sequence. It will return a control flow graph object. + # + # iseq = RubyVM::InstructionSequence.compile("1 + 2") + # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + # + class ControlFlowGraph + # This is the instruction sequence that this control flow graph + # corresponds to. + attr_reader :iseq + + # This is the list of instructions that this control flow graph contains. + # It is effectively the same as the list of instructions in the + # instruction sequence but with line numbers and events filtered out. + attr_reader :insns + + # This is the set of basic blocks that this control-flow graph contains. + attr_reader :blocks + + def initialize(iseq, insns, blocks) + @iseq = iseq + @insns = insns + @blocks = blocks + end + + def disasm + fmt = Disassembler.new(iseq) + fmt.output.puts("== cfg: #{iseq.inspect}") + + blocks.each do |block| + fmt.output.puts(block.id) + fmt.with_prefix(" ") do |prefix| + unless block.incoming_blocks.empty? + from = block.incoming_blocks.map(&:id) + fmt.output.puts("#{prefix}== from: #{from.join(", ")}") + end + + fmt.format_insns!(block.insns, block.block_start) + + to = block.outgoing_blocks.map(&:id) + to << "leaves" if block.insns.last.leaves? + fmt.output.puts("#{prefix}== to: #{to.join(", ")}") + end + end + + fmt.string + end + + # This method is used to verify that the control flow graph is well + # formed. It does this by checking that each basic block is itself well + # formed. + def verify + blocks.each(&:verify) + end + + def self.compile(iseq) + Compiler.new(iseq).compile + end + + # This class is responsible for creating a control flow graph from the + # given instruction sequence. + class Compiler + # This is the instruction sequence that is being compiled. + attr_reader :iseq + + # This is a hash of indices in the YARV instruction sequence that point + # to their corresponding instruction. + attr_reader :insns + + # This is a hash of labels that point to their corresponding index into + # the YARV instruction sequence. Note that this is not the same as the + # index into the list of instructions on the instruction sequence + # object. Instead, this is the index into the C array, so it includes + # operands. + attr_reader :labels + + def initialize(iseq) + @iseq = iseq + + @insns = {} + @labels = {} + + length = 0 + iseq.insns.each do |insn| + case insn + when Instruction + @insns[length] = insn + length += insn.length + when InstructionSequence::Label + @labels[insn] = length + end + end + end + + # This method is used to compile the instruction sequence into a control + # flow graph. It returns an instance of ControlFlowGraph. + def compile + blocks = connect_basic_blocks(build_basic_blocks) + ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify) + end + + private + + # Finds the indices of the instructions that start a basic block because + # they're either: + # + # * the start of an instruction sequence + # * the target of a branch + # * fallen through to from a branch + # + def find_basic_block_starts + block_starts = Set.new([0]) + + insns.each do |index, insn| + branch_targets = insn.branch_targets + + if branch_targets.any? + branch_targets.each do |branch_target| + block_starts.add(labels[branch_target]) + end + + block_starts.add(index + insn.length) if insn.falls_through? + end + end + + block_starts.to_a.sort + end + + # Builds up a set of basic blocks by iterating over the starts of each + # block. They are keyed by the index of their first instruction. + def build_basic_blocks + block_starts = find_basic_block_starts + + length = 0 + blocks = + iseq + .insns + .grep(Instruction) + .slice_after do |insn| + length += insn.length + block_starts.include?(length) + end + + block_starts + .zip(blocks) + .to_h do |block_start, block_insns| + [block_start, BasicBlock.new(block_start, block_insns)] + end + end + + # Connect the blocks by letting them know which blocks are incoming and + # outgoing from each block. + def connect_basic_blocks(blocks) + blocks.each do |block_start, block| + insn = block.insns.last + + insn.branch_targets.each do |branch_target| + block.outgoing_blocks << blocks.fetch(labels[branch_target]) + end + + if (insn.branch_targets.empty? && !insn.leaves?) || + insn.falls_through? + fall_through_start = block_start + block.insns.sum(&:length) + block.outgoing_blocks << blocks.fetch(fall_through_start) + end + + block.outgoing_blocks.each do |outgoing_block| + outgoing_block.incoming_blocks << block + end + end + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb new file mode 100644 index 00000000..614d1233 --- /dev/null +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -0,0 +1,225 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # Constructs a data-flow-graph of a YARV instruction sequence, via a + # control-flow-graph. Data flow is discovered locally and then globally. The + # graph only considers data flow through the stack - local variables and + # objects are considered fully escaped in this analysis. + class DataFlowGraph + # This object represents the flow of data between instructions. + class DataFlow + attr_reader :in + attr_reader :out + + def initialize + @in = [] + @out = [] + end + end + + attr_reader :cfg, :insn_flows, :block_flows + + def initialize(cfg, insn_flows, block_flows) + @cfg = cfg + @insn_flows = insn_flows + @block_flows = block_flows + end + + def disasm + fmt = Disassembler.new(cfg.iseq) + fmt.output.puts("== dfg: #{cfg.iseq.inspect}") + + cfg.blocks.each do |block| + fmt.output.puts(block.id) + fmt.with_prefix(" ") do |prefix| + unless block.incoming_blocks.empty? + from = block.incoming_blocks.map(&:id) + fmt.output.puts("#{prefix}== from: #{from.join(", ")}") + end + + block_flow = block_flows.fetch(block.id) + unless block_flow.in.empty? + fmt.output.puts("#{prefix}== in: #{block_flow.in.join(", ")}") + end + + fmt.format_insns!(block.insns, block.block_start) do |_, length| + insn_flow = insn_flows[length] + next if insn_flow.in.empty? && insn_flow.out.empty? + + fmt.output.print(" # ") + unless insn_flow.in.empty? + fmt.output.print("in: #{insn_flow.in.join(", ")}") + fmt.output.print("; ") unless insn_flow.out.empty? + end + + unless insn_flow.out.empty? + fmt.output.print("out: #{insn_flow.out.join(", ")}") + end + end + + to = block.outgoing_blocks.map(&:id) + to << "leaves" if block.insns.last.leaves? + fmt.output.puts("#{prefix}== to: #{to.join(", ")}") + + unless block_flow.out.empty? + fmt.output.puts("#{prefix}== out: #{block_flow.out.join(", ")}") + end + end + end + + fmt.string + end + + # Verify that we constructed the data flow graph correctly. + def verify + # Check that the first block has no arguments. + raise unless block_flows.fetch(cfg.blocks.first.id).in.empty? + + # Check all control flow edges between blocks pass the right number of + # arguments. + cfg.blocks.each do |block| + block_flow = block_flows.fetch(block.id) + + if block.outgoing_blocks.empty? + # With no outgoing blocks, there should be no output arguments. + raise unless block_flow.out.empty? + else + # Check with outgoing blocks... + block.outgoing_blocks.each do |outgoing_block| + outgoing_flow = block_flows.fetch(outgoing_block.id) + + # The block should have as many output arguments as the + # outgoing block has input arguments. + raise unless block_flow.out.size == outgoing_flow.in.size + end + end + end + end + + def self.compile(cfg) + Compiler.new(cfg).compile + end + + # This class is responsible for creating a data flow graph from the given + # control flow graph. + class Compiler + # This is the control flow graph that is being compiled. + attr_reader :cfg + + # This data structure will hold the data flow between instructions + # within individual basic blocks. + attr_reader :insn_flows + + # This data structure will hold the data flow between basic blocks. + attr_reader :block_flows + + def initialize(cfg) + @cfg = cfg + @insn_flows = cfg.insns.to_h { |length, _| [length, DataFlow.new] } + @block_flows = cfg.blocks.to_h { |block| [block.id, DataFlow.new] } + end + + def compile + find_local_flow + find_global_flow + DataFlowGraph.new(cfg, insn_flows, block_flows).tap(&:verify) + end + + private + + # Find the data flow within each basic block. Using an abstract stack, + # connect from consumers of data to the producers of that data. + def find_local_flow + cfg.blocks.each do |block| + block_flow = block_flows.fetch(block.id) + stack = [] + + # Go through each instruction in the block... + block.each_with_length do |insn, length| + insn_flow = insn_flows[length] + + # How many values will be missing from the local stack to run this + # instruction? + missing = insn.pops - stack.size + + # For every value the instruction pops off the stack... + insn.pops.times do + # Was the value it pops off from another basic block? + if stack.empty? + # This is a basic block argument. + missing -= 1 + name = :"in_#{missing}" + + insn_flow.in.unshift(name) + block_flow.in.unshift(name) + else + # Connect this consumer to the producer of the value. + insn_flow.in.unshift(stack.pop) + end + end + + # Record on our abstract stack that this instruction pushed + # this value onto the stack. + insn.pushes.times { stack << length } + end + + # Values that are left on the stack after going through all + # instructions are arguments to the basic block that we jump to. + stack.reverse_each.with_index do |producer, index| + block_flow.out << producer + insn_flows[producer].out << :"out_#{index}" + end + end + + # Go backwards and connect from producers to consumers. + cfg.insns.each_key do |length| + # For every instruction that produced a value used in this + # instruction... + insn_flows[length].in.each do |producer| + # If it's actually another instruction and not a basic block + # argument... + if producer.is_a?(Integer) + # Record in the producing instruction that it produces a value + # used by this construction. + insn_flows[producer].out << length + end + end + end + end + + # Find the data that flows between basic blocks. + def find_global_flow + stack = [*cfg.blocks] + + until stack.empty? + block = stack.pop + block_flow = block_flows.fetch(block.id) + + block.incoming_blocks.each do |incoming_block| + incoming_flow = block_flows.fetch(incoming_block.id) + + # Does a predecessor block have fewer outputs than the successor + # has inputs? + if incoming_flow.out.size < block_flow.in.size + # If so then add arguments to pass data through from the + # incoming block's incoming blocks. + (block_flow.in.size - incoming_flow.out.size).times do |index| + name = :"pass_#{index}" + + incoming_flow.in.unshift(name) + incoming_flow.out.unshift(name) + end + + # Since we modified the incoming block, add it back to the stack + # so it'll be considered as an outgoing block again, and + # propogate the global data flow back up the control flow graph. + stack << incoming_block + end + end + end + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index d303bcb7..ad66d0bf 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -4,15 +4,16 @@ module SyntaxTree module YARV class Disassembler attr_reader :output, :queue + attr_reader :current_prefix attr_accessor :current_iseq - def initialize + def initialize(current_iseq = nil) @output = StringIO.new @queue = [] @current_prefix = "" - @current_iseq = nil + @current_iseq = current_iseq end ######################################################################## @@ -20,30 +21,7 @@ def initialize ######################################################################## def calldata(value) - flag_names = [] - flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) - if value.flag?(CallData::CALL_ARGS_BLOCKARG) - flag_names << :ARGS_BLOCKARG - end - flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) - flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) - flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) - flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) - flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) - flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) - flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) - flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) - flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) - flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) - flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) - - parts = [] - parts << "mid:#{value.method}" if value.method - parts << "argc:#{value.argc}" - parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg - parts << flag_names.join("|") if flag_names.any? - - "" + value.inspect end def enqueue(iseq) @@ -97,71 +75,25 @@ def object(value) end ######################################################################## - # Main entrypoint + # Entrypoints ######################################################################## + def string + output.string + end + def format! while (@current_iseq = queue.shift) output << "\n" if output.pos > 0 format_iseq(@current_iseq) end - - output.string end - private - - def format_iseq(iseq) - output << "#{current_prefix}== disasm: " - output << "#:1 " - - location = Location.fixed(line: iseq.line, char: 0, column: 0) - output << "(#{location.start_line},#{location.start_column})-" - output << "(#{location.end_line},#{location.end_column})" - output << "> " - - if iseq.catch_table.any? - output << "(catch: TRUE)\n" - output << "#{current_prefix}== catch table\n" - - with_prefix("#{current_prefix}| ") do - iseq.catch_table.each do |entry| - case entry - when InstructionSequence::CatchBreak - output << "#{current_prefix}catch type: break\n" - format_iseq(entry.iseq) - when InstructionSequence::CatchNext - output << "#{current_prefix}catch type: next\n" - when InstructionSequence::CatchRedo - output << "#{current_prefix}catch type: redo\n" - when InstructionSequence::CatchRescue - output << "#{current_prefix}catch type: rescue\n" - format_iseq(entry.iseq) - end - end - end - - output << "#{current_prefix}|#{"-" * 72}\n" - else - output << "(catch: FALSE)\n" - end - - if (local_table = iseq.local_table) && !local_table.empty? - output << "#{current_prefix}local table (size: #{local_table.size})\n" - - locals = - local_table.locals.each_with_index.map do |local, index| - "[%2d] %s@%d" % [local_table.offset(index), local.name, index] - end - - output << "#{current_prefix}#{locals.join(" ")}\n" - end - - length = 0 + def format_insns!(insns, length = 0) events = [] lines = [] - iseq.insns.each do |insn| + insns.each do |insn| case insn when Integer lines << insn @@ -191,6 +123,10 @@ def format_iseq(iseq) events.clear end + # A hook here to allow for custom formatting of instructions after + # the main body has been processed. + yield insn, length if block_given? + output << "\n" length += insn.length end @@ -202,11 +138,56 @@ def with_prefix(value) begin @current_prefix = value - yield + yield value ensure @current_prefix = previous end end + + private + + def format_iseq(iseq) + output << "#{current_prefix}== disasm: #{iseq.inspect} " + + if iseq.catch_table.any? + output << "(catch: TRUE)\n" + output << "#{current_prefix}== catch table\n" + + with_prefix("#{current_prefix}| ") do + iseq.catch_table.each do |entry| + case entry + when InstructionSequence::CatchBreak + output << "#{current_prefix}catch type: break\n" + format_iseq(entry.iseq) + when InstructionSequence::CatchNext + output << "#{current_prefix}catch type: next\n" + when InstructionSequence::CatchRedo + output << "#{current_prefix}catch type: redo\n" + when InstructionSequence::CatchRescue + output << "#{current_prefix}catch type: rescue\n" + format_iseq(entry.iseq) + end + end + end + + output << "#{current_prefix}|#{"-" * 72}\n" + else + output << "(catch: FALSE)\n" + end + + if (local_table = iseq.local_table) && !local_table.empty? + output << "#{current_prefix}local table (size: #{local_table.size})\n" + + locals = + local_table.locals.each_with_index.map do |local, index| + "[%2d] %s@%d" % [local_table.offset(index), local.name, index] + end + + output << "#{current_prefix}#{locals.join(" ")}\n" + end + + format_insns!(iseq.insns) + end end end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 6aa7279e..45fc6121 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -270,9 +270,14 @@ def to_a end def disasm - disassembler = Disassembler.new - disassembler.enqueue(self) - disassembler.format! + fmt = Disassembler.new + fmt.enqueue(self) + fmt.format! + fmt.string + end + + def inspect + "#:1 (#{line},0)-(#{line},0)>" end # This method converts our linked list of instructions into a final array diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index bba06f8d..9bd8f0cd 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -2,65 +2,48 @@ module SyntaxTree module YARV - # This is an operand to various YARV instructions that represents the - # information about a specific call site. - class CallData - CALL_ARGS_SPLAT = 1 << 0 - CALL_ARGS_BLOCKARG = 1 << 1 - CALL_FCALL = 1 << 2 - CALL_VCALL = 1 << 3 - CALL_ARGS_SIMPLE = 1 << 4 - CALL_BLOCKISEQ = 1 << 5 - CALL_KWARG = 1 << 6 - CALL_KW_SPLAT = 1 << 7 - CALL_TAILCALL = 1 << 8 - CALL_SUPER = 1 << 9 - CALL_ZSUPER = 1 << 10 - CALL_OPT_SEND = 1 << 11 - CALL_KW_SPLAT_MUT = 1 << 12 - - attr_reader :method, :argc, :flags, :kw_arg - - def initialize( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - @method = method - @argc = argc - @flags = flags - @kw_arg = kw_arg + # This is a base class for all YARV instructions. It provides a few + # convenience methods for working with instructions. + class Instruction + # This method creates an instruction that represents the canonical + # (non-specialized) form of this instruction. If this instruction is not + # a specialized instruction, then this method returns `self`. + def canonical + self end - def flag?(mask) - (flags & mask) > 0 + # This returns the size of the instruction in terms of the number of slots + # it occupies in the instruction sequence. Effectively this is 1 plus the + # number of operands. + def length + 1 end - def to_h - result = { mid: method, flag: flags, orig_argc: argc } - result[:kw_arg] = kw_arg if kw_arg - result + # This returns the number of values that are pushed onto the stack. + def pushes + 0 end - def self.from(serialized) - new( - serialized[:mid], - serialized[:orig_argc], - serialized[:flag], - serialized[:kw_arg] - ) + # This returns the number of values that are popped off the stack. + def pops + 0 + end + + # This returns an array of labels. + def branch_targets + [] end - end - # A convenience method for creating a CallData object. - def self.calldata( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - CallData.new(method, argc, flags, kw_arg) + # Whether or not this instruction leaves the current frame. + def leaves? + false + end + + # Whether or not this instruction falls through to the next instruction if + # its branching fails. + def falls_through? + false + end end # ### Summary @@ -76,7 +59,7 @@ def self.calldata( # x[0] # ~~~ # - class AdjustStack + class AdjustStack < Instruction attr_reader :number def initialize(number) @@ -107,14 +90,6 @@ def pops number end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.pop(number) end @@ -138,7 +113,7 @@ def call(vm) # "#{5}" # ~~~ # - class AnyToString + class AnyToString < Instruction def disasm(fmt) fmt.instruction("anytostring") end @@ -155,10 +130,6 @@ def ==(other) other.is_a?(AnyToString) end - def length - 1 - end - def pops 2 end @@ -167,10 +138,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) original, value = vm.pop(2) @@ -198,7 +165,7 @@ def call(vm) # puts x # ~~~ # - class BranchIf + class BranchIf < Instruction attr_reader :label def initialize(label) @@ -229,16 +196,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) if vm.pop end - def canonical - self + def branch_targets + [label] end - def call(vm) - vm.jump(label) if vm.pop + def falls_through? + true end end @@ -259,7 +226,7 @@ def call(vm) # end # ~~~ # - class BranchNil + class BranchNil < Instruction attr_reader :label def initialize(label) @@ -290,16 +257,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) if vm.pop.nil? end - def canonical - self + def branch_targets + [label] end - def call(vm) - vm.jump(label) if vm.pop.nil? + def falls_through? + true end end @@ -319,7 +286,7 @@ def call(vm) # end # ~~~ # - class BranchUnless + class BranchUnless < Instruction attr_reader :label def initialize(label) @@ -350,16 +317,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) unless vm.pop end - def canonical - self + def branch_targets + [label] end - def call(vm) - vm.jump(label) unless vm.pop + def falls_through? + true end end @@ -382,7 +349,7 @@ def call(vm) # evaluate(value: 3) # ~~~ # - class CheckKeyword + class CheckKeyword < Instruction attr_reader :keyword_bits_index, :keyword_index def initialize(keyword_bits_index, keyword_index) @@ -419,18 +386,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(keyword_bits_index, 0)[keyword_index]) end @@ -448,7 +407,7 @@ def call(vm) # foo in Foo # ~~~ # - class CheckMatch + class CheckMatch < Instruction VM_CHECKMATCH_TYPE_WHEN = 1 VM_CHECKMATCH_TYPE_CASE = 2 VM_CHECKMATCH_TYPE_RESCUE = 3 @@ -489,10 +448,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) target, pattern = vm.pop(2) @@ -536,7 +491,7 @@ def check?(pattern, target) # foo in [bar] # ~~~ # - class CheckType + class CheckType < Instruction TYPE_OBJECT = 0x01 TYPE_CLASS = 0x02 TYPE_MODULE = 0x03 @@ -643,10 +598,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) object = vm.pop result = @@ -713,7 +664,7 @@ def call(vm) # [1, *2] # ~~~ # - class ConcatArray + class ConcatArray < Instruction def disasm(fmt) fmt.instruction("concatarray") end @@ -730,10 +681,6 @@ def ==(other) other.is_a?(ConcatArray) end - def length - 1 - end - def pops 2 end @@ -742,10 +689,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) left, right = vm.pop(2) vm.push([*left, *right]) @@ -767,7 +710,7 @@ def call(vm) # "#{5}" # ~~~ # - class ConcatStrings + class ConcatStrings < Instruction attr_reader :number def initialize(number) @@ -802,10 +745,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).join) end @@ -826,7 +765,7 @@ def call(vm) # end # ~~~ # - class DefineClass + class DefineClass < Instruction TYPE_CLASS = 0 TYPE_SINGLETON_CLASS = 1 TYPE_MODULE = 2 @@ -874,10 +813,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) object, superclass = vm.pop(2) @@ -914,7 +849,7 @@ def call(vm) # defined?(x) # ~~~ # - class Defined + class Defined < Instruction TYPE_NIL = 1 TYPE_IVAR = 2 TYPE_LVAR = 3 @@ -1011,10 +946,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) object = vm.pop @@ -1069,7 +1000,7 @@ def call(vm) # def value = "value" # ~~~ # - class DefineMethod + class DefineMethod < Instruction attr_reader :method_name, :method_iseq def initialize(method_name, method_iseq) @@ -1102,18 +1033,6 @@ def length 3 end - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - def call(vm) name = method_name nesting = vm.frame.nesting @@ -1150,7 +1069,7 @@ def call(vm) # def self.value = "value" # ~~~ # - class DefineSMethod + class DefineSMethod < Instruction attr_reader :method_name, :method_iseq def initialize(method_name, method_iseq) @@ -1187,14 +1106,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) name = method_name nesting = vm.frame.nesting @@ -1227,7 +1138,7 @@ def call(vm) # $global = 5 # ~~~ # - class Dup + class Dup < Instruction def disasm(fmt) fmt.instruction("dup") end @@ -1244,10 +1155,6 @@ def ==(other) other.is_a?(Dup) end - def length - 1 - end - def pops 1 end @@ -1256,10 +1163,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) vm.push(vm.stack.last.dup) end @@ -1275,7 +1178,7 @@ def call(vm) # [true] # ~~~ # - class DupArray + class DupArray < Instruction attr_reader :object def initialize(object) @@ -1302,18 +1205,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -1329,7 +1224,7 @@ def call(vm) # { a: 1 } # ~~~ # - class DupHash + class DupHash < Instruction attr_reader :object def initialize(object) @@ -1356,18 +1251,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -1383,7 +1270,7 @@ def call(vm) # Object::X ||= true # ~~~ # - class DupN + class DupN < Instruction attr_reader :number def initialize(number) @@ -1410,18 +1297,10 @@ def length 2 end - def pops - 0 - end - def pushes number end - def canonical - self - end - def call(vm) values = vm.pop(number) vm.push(*values) @@ -1441,7 +1320,7 @@ def call(vm) # x, = [true, false, nil] # ~~~ # - class ExpandArray + class ExpandArray < Instruction attr_reader :number, :flags def initialize(number, flags) @@ -1478,10 +1357,6 @@ def pushes number end - def canonical - self - end - def call(vm) object = vm.pop object = @@ -1539,7 +1414,7 @@ def call(vm) # end # ~~~ # - class GetBlockParam + class GetBlockParam < Instruction attr_reader :index, :level def initialize(index, level) @@ -1570,18 +1445,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1602,7 +1469,7 @@ def call(vm) # end # ~~~ # - class GetBlockParamProxy + class GetBlockParamProxy < Instruction attr_reader :index, :level def initialize(index, level) @@ -1636,18 +1503,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1665,7 +1524,7 @@ def call(vm) # @@class_variable # ~~~ # - class GetClassVariable + class GetClassVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -1697,18 +1556,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) @@ -1728,7 +1579,7 @@ def call(vm) # Constant # ~~~ # - class GetConstant + class GetConstant < Instruction attr_reader :name def initialize(name) @@ -1763,10 +1614,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) const_base, allow_nil = vm.pop(2) @@ -1798,7 +1645,7 @@ def call(vm) # $$ # ~~~ # - class GetGlobal + class GetGlobal < Instruction attr_reader :name def initialize(name) @@ -1825,18 +1672,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) # Evaluating the name of the global variable because there isn't a # reflection API for global variables. @@ -1861,7 +1700,7 @@ def call(vm) # @instance_variable # ~~~ # - class GetInstanceVariable + class GetInstanceVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -1893,18 +1732,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) method = Object.instance_method(:instance_variable_get) vm.push(method.bind(vm.frame._self).call(name)) @@ -1925,7 +1756,7 @@ def call(vm) # tap { tap { value } } # ~~~ # - class GetLocal + class GetLocal < Instruction attr_reader :index, :level def initialize(index, level) @@ -1955,18 +1786,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1985,7 +1808,7 @@ def call(vm) # value # ~~~ # - class GetLocalWC0 + class GetLocalWC0 < Instruction attr_reader :index def initialize(index) @@ -2012,10 +1835,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -2042,7 +1861,7 @@ def call(vm) # self.then { value } # ~~~ # - class GetLocalWC1 + class GetLocalWC1 < Instruction attr_reader :index def initialize(index) @@ -2069,10 +1888,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -2096,7 +1911,7 @@ def call(vm) # 1 if (a == 1) .. (b == 2) # ~~~ # - class GetSpecial + class GetSpecial < Instruction SVAR_LASTLINE = 0 # $_ SVAR_BACKREF = 1 # $~ SVAR_FLIPFLOP_START = 2 # flipflop @@ -2128,18 +1943,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) case key when SVAR_LASTLINE @@ -2163,7 +1970,7 @@ def call(vm) # :"#{"foo"}" # ~~~ # - class Intern + class Intern < Instruction def disasm(fmt) fmt.instruction("intern") end @@ -2180,10 +1987,6 @@ def ==(other) other.is_a?(Intern) end - def length - 1 - end - def pops 1 end @@ -2192,10 +1995,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop.to_sym) end @@ -2215,7 +2014,7 @@ def call(vm) # end # ~~~ # - class InvokeBlock + class InvokeBlock < Instruction attr_reader :calldata def initialize(calldata) @@ -2250,10 +2049,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.frame_yield.block.call(*vm.pop(calldata.argc))) end @@ -2273,7 +2068,7 @@ def call(vm) # end # ~~~ # - class InvokeSuper + class InvokeSuper < Instruction attr_reader :calldata, :block_iseq def initialize(calldata, block_iseq) @@ -2302,10 +2097,6 @@ def ==(other) other.block_iseq == block_iseq end - def length - 1 - end - def pops argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) argb + calldata.argc + 1 @@ -2315,10 +2106,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) block = if (iseq = block_iseq) @@ -2358,7 +2145,7 @@ def call(vm) # end # ~~~ # - class Jump + class Jump < Instruction attr_reader :label def initialize(label) @@ -2385,21 +2172,13 @@ def length 2 end - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.jump(label) end + + def branch_targets + [label] + end end # ### Summary @@ -2412,7 +2191,7 @@ def call(vm) # ;; # ~~~ # - class Leave + class Leave < Instruction def disasm(fmt) fmt.instruction("leave") end @@ -2429,10 +2208,6 @@ def ==(other) other.is_a?(Leave) end - def length - 1 - end - def pops 1 end @@ -2443,13 +2218,13 @@ def pushes 0 end - def canonical - self - end - def call(vm) vm.leave end + + def leaves? + true + end end # ### Summary @@ -2464,7 +2239,7 @@ def call(vm) # ["string"] # ~~~ # - class NewArray + class NewArray < Instruction attr_reader :number def initialize(number) @@ -2499,10 +2274,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number)) end @@ -2520,7 +2291,7 @@ def call(vm) # ["string", **{ foo: "bar" }] # ~~~ # - class NewArrayKwSplat + class NewArrayKwSplat < Instruction attr_reader :number def initialize(number) @@ -2555,10 +2326,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number)) end @@ -2578,7 +2345,7 @@ def call(vm) # end # ~~~ # - class NewHash + class NewHash < Instruction attr_reader :number def initialize(number) @@ -2613,10 +2380,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).each_slice(2).to_h) end @@ -2637,7 +2400,7 @@ def call(vm) # p (x..y), (x...y) # ~~~ # - class NewRange + class NewRange < Instruction attr_reader :exclude_end def initialize(exclude_end) @@ -2672,10 +2435,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(Range.new(*vm.pop(2), exclude_end == 1)) end @@ -2692,7 +2451,7 @@ def call(vm) # raise rescue true # ~~~ # - class Nop + class Nop < Instruction def disasm(fmt) fmt.instruction("nop") end @@ -2709,22 +2468,6 @@ def ==(other) other.is_a?(Nop) end - def length - 1 - end - - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - def call(vm) end end @@ -2743,7 +2486,7 @@ def call(vm) # "#{5}" # ~~~ # - class ObjToString + class ObjToString < Instruction attr_reader :calldata def initialize(calldata) @@ -2778,10 +2521,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop.to_s) end @@ -2800,7 +2539,7 @@ def call(vm) # END { puts "END" } # ~~~ # - class Once + class Once < Instruction attr_reader :iseq, :cache def initialize(iseq, cache) @@ -2829,18 +2568,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) return if @executed vm.push(vm.run_block_frame(iseq, vm.frame)) @@ -2861,7 +2592,7 @@ def call(vm) # 2 & 3 # ~~~ # - class OptAnd + class OptAnd < Instruction attr_reader :calldata def initialize(calldata) @@ -2917,7 +2648,7 @@ def call(vm) # 7[2] # ~~~ # - class OptAref + class OptAref < Instruction attr_reader :calldata def initialize(calldata) @@ -2974,7 +2705,7 @@ def call(vm) # { 'test' => true }['test'] # ~~~ # - class OptArefWith + class OptArefWith < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -3014,10 +2745,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop[object]) end @@ -3036,7 +2763,7 @@ def call(vm) # {}[:key] = value # ~~~ # - class OptAset + class OptAset < Instruction attr_reader :calldata def initialize(calldata) @@ -3092,7 +2819,7 @@ def call(vm) # {}["key"] = value # ~~~ # - class OptAsetWith + class OptAsetWith < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -3132,10 +2859,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) hash, value = vm.pop(2) vm.push(hash[object] = value) @@ -3165,7 +2888,7 @@ def call(vm) # end # ~~~ # - class OptCaseDispatch + class OptCaseDispatch < Instruction attr_reader :case_dispatch_hash, :else_label def initialize(case_dispatch_hash, else_label) @@ -3206,16 +2929,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) end - def canonical - self + def branch_targets + case_dispatch_hash.values.push(else_label) end - def call(vm) - vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) + def falls_through? + true end end @@ -3232,7 +2955,7 @@ def call(vm) # 2 / 3 # ~~~ # - class OptDiv + class OptDiv < Instruction attr_reader :calldata def initialize(calldata) @@ -3288,7 +3011,7 @@ def call(vm) # "".empty? # ~~~ # - class OptEmptyP + class OptEmptyP < Instruction attr_reader :calldata def initialize(calldata) @@ -3345,7 +3068,7 @@ def call(vm) # 2 == 2 # ~~~ # - class OptEq + class OptEq < Instruction attr_reader :calldata def initialize(calldata) @@ -3402,7 +3125,7 @@ def call(vm) # 4 >= 3 # ~~~ # - class OptGE + class OptGE < Instruction attr_reader :calldata def initialize(calldata) @@ -3458,7 +3181,7 @@ def call(vm) # ::Object # ~~~ # - class OptGetConstantPath + class OptGetConstantPath < Instruction attr_reader :names def initialize(names) @@ -3486,18 +3209,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) current = vm.frame._self current = current.class unless current.is_a?(Class) @@ -3523,7 +3238,7 @@ def call(vm) # 4 > 3 # ~~~ # - class OptGT + class OptGT < Instruction attr_reader :calldata def initialize(calldata) @@ -3580,7 +3295,7 @@ def call(vm) # 3 <= 4 # ~~~ # - class OptLE + class OptLE < Instruction attr_reader :calldata def initialize(calldata) @@ -3637,7 +3352,7 @@ def call(vm) # "".length # ~~~ # - class OptLength + class OptLength < Instruction attr_reader :calldata def initialize(calldata) @@ -3694,7 +3409,7 @@ def call(vm) # 3 < 4 # ~~~ # - class OptLT + class OptLT < Instruction attr_reader :calldata def initialize(calldata) @@ -3751,7 +3466,7 @@ def call(vm) # "" << 2 # ~~~ # - class OptLTLT + class OptLTLT < Instruction attr_reader :calldata def initialize(calldata) @@ -3809,7 +3524,7 @@ def call(vm) # 3 - 2 # ~~~ # - class OptMinus + class OptMinus < Instruction attr_reader :calldata def initialize(calldata) @@ -3866,7 +3581,7 @@ def call(vm) # 4 % 2 # ~~~ # - class OptMod + class OptMod < Instruction attr_reader :calldata def initialize(calldata) @@ -3923,7 +3638,7 @@ def call(vm) # 3 * 2 # ~~~ # - class OptMult + class OptMult < Instruction attr_reader :calldata def initialize(calldata) @@ -3982,7 +3697,7 @@ def call(vm) # 2 != 2 # ~~~ # - class OptNEq + class OptNEq < Instruction attr_reader :eq_calldata, :neq_calldata def initialize(eq_calldata, neq_calldata) @@ -4022,10 +3737,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) receiver, argument = vm.pop(2) vm.push(receiver != argument) @@ -4044,7 +3755,7 @@ def call(vm) # [a, b, c].max # ~~~ # - class OptNewArrayMax + class OptNewArrayMax < Instruction attr_reader :number def initialize(number) @@ -4079,10 +3790,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).max) end @@ -4100,7 +3807,7 @@ def call(vm) # [a, b, c].min # ~~~ # - class OptNewArrayMin + class OptNewArrayMin < Instruction attr_reader :number def initialize(number) @@ -4135,10 +3842,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).min) end @@ -4157,7 +3860,7 @@ def call(vm) # "".nil? # ~~~ # - class OptNilP + class OptNilP < Instruction attr_reader :calldata def initialize(calldata) @@ -4212,7 +3915,7 @@ def call(vm) # !true # ~~~ # - class OptNot + class OptNot < Instruction attr_reader :calldata def initialize(calldata) @@ -4269,7 +3972,7 @@ def call(vm) # 2 | 3 # ~~~ # - class OptOr + class OptOr < Instruction attr_reader :calldata def initialize(calldata) @@ -4326,7 +4029,7 @@ def call(vm) # 2 + 3 # ~~~ # - class OptPlus + class OptPlus < Instruction attr_reader :calldata def initialize(calldata) @@ -4382,7 +4085,7 @@ def call(vm) # /a/ =~ "a" # ~~~ # - class OptRegExpMatch2 + class OptRegExpMatch2 < Instruction attr_reader :calldata def initialize(calldata) @@ -4438,7 +4141,7 @@ def call(vm) # puts "Hello, world!" # ~~~ # - class OptSendWithoutBlock + class OptSendWithoutBlock < Instruction attr_reader :calldata def initialize(calldata) @@ -4495,7 +4198,7 @@ def call(vm) # "".size # ~~~ # - class OptSize + class OptSize < Instruction attr_reader :calldata def initialize(calldata) @@ -4551,7 +4254,7 @@ def call(vm) # "hello".freeze # ~~~ # - class OptStrFreeze + class OptStrFreeze < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -4583,18 +4286,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.freeze) end @@ -4612,7 +4307,7 @@ def call(vm) # -"string" # ~~~ # - class OptStrUMinus + class OptStrUMinus < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -4644,18 +4339,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(-object) end @@ -4674,7 +4361,7 @@ def call(vm) # "".succ # ~~~ # - class OptSucc + class OptSucc < Instruction attr_reader :calldata def initialize(calldata) @@ -4728,7 +4415,7 @@ def call(vm) # a ||= 2 # ~~~ # - class Pop + class Pop < Instruction def disasm(fmt) fmt.instruction("pop") end @@ -4745,22 +4432,10 @@ def ==(other) other.is_a?(Pop) end - def length - 1 - end - def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.pop end @@ -4776,7 +4451,7 @@ def call(vm) # nil # ~~~ # - class PutNil + class PutNil < Instruction def disasm(fmt) fmt.instruction("putnil") end @@ -4793,14 +4468,6 @@ def ==(other) other.is_a?(PutNil) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4824,7 +4491,7 @@ def call(vm) # 5 # ~~~ # - class PutObject + class PutObject < Instruction attr_reader :object def initialize(object) @@ -4851,18 +4518,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object) end @@ -4880,7 +4539,7 @@ def call(vm) # 0 # ~~~ # - class PutObjectInt2Fix0 + class PutObjectInt2Fix0 < Instruction def disasm(fmt) fmt.instruction("putobject_INT2FIX_0_") end @@ -4897,14 +4556,6 @@ def ==(other) other.is_a?(PutObjectInt2Fix0) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4930,7 +4581,7 @@ def call(vm) # 1 # ~~~ # - class PutObjectInt2Fix1 + class PutObjectInt2Fix1 < Instruction def disasm(fmt) fmt.instruction("putobject_INT2FIX_1_") end @@ -4947,14 +4598,6 @@ def ==(other) other.is_a?(PutObjectInt2Fix1) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4978,7 +4621,7 @@ def call(vm) # puts "Hello, world!" # ~~~ # - class PutSelf + class PutSelf < Instruction def disasm(fmt) fmt.instruction("putself") end @@ -4995,22 +4638,10 @@ def ==(other) other.is_a?(PutSelf) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.frame._self) end @@ -5028,7 +4659,7 @@ def call(vm) # alias foo bar # ~~~ # - class PutSpecialObject + class PutSpecialObject < Instruction OBJECT_VMCORE = 1 OBJECT_CBASE = 2 OBJECT_CONST_BASE = 3 @@ -5059,18 +4690,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) case object when OBJECT_VMCORE @@ -5095,7 +4718,7 @@ def call(vm) # "foo" # ~~~ # - class PutString + class PutString < Instruction attr_reader :object def initialize(object) @@ -5122,18 +4745,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -5152,7 +4767,7 @@ def call(vm) # "hello".tap { |i| p i } # ~~~ # - class Send + class Send < Instruction attr_reader :calldata, :block_iseq def initialize(calldata, block_iseq) @@ -5194,10 +4809,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) block = if (iseq = block_iseq) @@ -5240,7 +4851,7 @@ def call(vm) # end # ~~~ # - class SetBlockParam + class SetBlockParam < Instruction attr_reader :index, :level def initialize(index, level) @@ -5275,14 +4886,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.local_set(index, level, vm.pop) end @@ -5301,7 +4904,7 @@ def call(vm) # @@class_variable = 1 # ~~~ # - class SetClassVariable + class SetClassVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -5337,14 +4940,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) @@ -5363,7 +4958,7 @@ def call(vm) # Constant = 1 # ~~~ # - class SetConstant + class SetConstant < Instruction attr_reader :name def initialize(name) @@ -5394,14 +4989,6 @@ def pops 2 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) value, parent = vm.pop(2) parent.const_set(name, value) @@ -5419,7 +5006,7 @@ def call(vm) # $global = 5 # ~~~ # - class SetGlobal + class SetGlobal < Instruction attr_reader :name def initialize(name) @@ -5450,14 +5037,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) # Evaluating the name of the global variable because there isn't a # reflection API for global variables. @@ -5481,7 +5060,7 @@ def call(vm) # @instance_variable = 1 # ~~~ # - class SetInstanceVariable + class SetInstanceVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -5517,14 +5096,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) method = Object.instance_method(:instance_variable_set) method.bind(vm.frame._self).call(name, vm.pop) @@ -5545,7 +5116,7 @@ def call(vm) # tap { tap { value = 10 } } # ~~~ # - class SetLocal + class SetLocal < Instruction attr_reader :index, :level def initialize(index, level) @@ -5579,14 +5150,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.local_set(index, level, vm.pop) end @@ -5605,7 +5168,7 @@ def call(vm) # value = 5 # ~~~ # - class SetLocalWC0 + class SetLocalWC0 < Instruction attr_reader :index def initialize(index) @@ -5636,10 +5199,6 @@ def pops 1 end - def pushes - 0 - end - def canonical SetLocal.new(index, 0) end @@ -5662,7 +5221,7 @@ def call(vm) # self.then { value = 10 } # ~~~ # - class SetLocalWC1 + class SetLocalWC1 < Instruction attr_reader :index def initialize(index) @@ -5693,10 +5252,6 @@ def pops 1 end - def pushes - 0 - end - def canonical SetLocal.new(index, 1) end @@ -5717,7 +5272,7 @@ def call(vm) # {}[:key] = 'val' # ~~~ # - class SetN + class SetN < Instruction attr_reader :number def initialize(number) @@ -5752,10 +5307,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.stack[-number - 1] = vm.stack.last end @@ -5773,7 +5324,7 @@ def call(vm) # baz if (foo == 1) .. (bar == 1) # ~~~ # - class SetSpecial + class SetSpecial < Instruction attr_reader :key def initialize(key) @@ -5804,14 +5355,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) case key when GetSpecial::SVAR_LASTLINE @@ -5836,7 +5379,7 @@ def call(vm) # x = *(5) # ~~~ # - class SplatArray + class SplatArray < Instruction attr_reader :flag def initialize(flag) @@ -5871,10 +5414,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) value = vm.pop @@ -5914,7 +5453,7 @@ def call(vm) # !!defined?([[]]) # ~~~ # - class Swap + class Swap < Instruction def disasm(fmt) fmt.instruction("swap") end @@ -5931,10 +5470,6 @@ def ==(other) other.is_a?(Swap) end - def length - 1 - end - def pops 2 end @@ -5943,10 +5478,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) left, right = vm.pop(2) vm.push(right, left) @@ -5965,7 +5496,7 @@ def call(vm) # [1, 2, 3].map { break 2 } # ~~~ # - class Throw + class Throw < Instruction RUBY_TAG_NONE = 0x0 RUBY_TAG_RETURN = 0x1 RUBY_TAG_BREAK = 0x2 @@ -6013,10 +5544,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) state = type & VM_THROW_STATE_MASK value = vm.pop @@ -6072,7 +5599,7 @@ def error_backtrace(vm) # end # ~~~ # - class TopN + class TopN < Instruction attr_reader :number def initialize(number) @@ -6099,18 +5626,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.stack[-number - 1]) end @@ -6127,7 +5646,7 @@ def call(vm) # /foo #{bar}/ # ~~~ # - class ToRegExp + class ToRegExp < Instruction attr_reader :options, :length def initialize(options, length) @@ -6160,10 +5679,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(Regexp.new(vm.pop(length).join, options)) end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index ab9b00df..e20729d9 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -19,7 +19,7 @@ module Legacy # @@class_variable # ~~~ # - class GetClassVariable + class GetClassVariable < Instruction attr_reader :name def initialize(name) @@ -46,10 +46,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -79,7 +75,7 @@ def call(vm) # Constant # ~~~ # - class OptGetInlineCache + class OptGetInlineCache < Instruction attr_reader :label, :cache def initialize(label, cache) @@ -111,21 +107,21 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(nil) end + + def branch_targets + [label] + end + + def falls_through? + true + end end # ### Summary @@ -143,7 +139,7 @@ def call(vm) # Constant # ~~~ # - class OptSetInlineCache + class OptSetInlineCache < Instruction attr_reader :cache def initialize(cache) @@ -178,10 +174,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) end end @@ -200,7 +192,7 @@ def call(vm) # @@class_variable = 1 # ~~~ # - class SetClassVariable + class SetClassVariable < Instruction attr_reader :name def initialize(name) @@ -231,10 +223,6 @@ def pops 1 end - def pushes - 0 - end - def canonical YARV::SetClassVariable.new(name, nil) end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index e3995435..5ac37504 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -288,41 +288,84 @@ def value end end - instructions = - YARV.constants.map { YARV.const_get(_1) } + - YARV::Legacy.constants.map { YARV::Legacy.const_get(_1) } - - [ - YARV::Assembler, - YARV::Bf, - YARV::CallData, - YARV::Compiler, - YARV::Decompiler, - YARV::Disassembler, - YARV::InstructionSequence, - YARV::Legacy, - YARV::LocalTable, - YARV::VM - ] + ObjectSpace.each_object(YARV::Instruction.singleton_class) do |instruction| + next if instruction == YARV::Instruction - interface = %i[ - disasm - to_a - deconstruct_keys - length - pops - pushes - canonical - call - == - ] - - instructions.each do |instruction| define_method("test_instruction_interface_#{instruction.name}") do - instance_methods = instruction.instance_methods(false) - assert_empty(interface - instance_methods) + methods = instruction.instance_methods(false) + assert_empty(%i[disasm to_a deconstruct_keys call ==] - methods) end end + def test_cfg + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + + assert_equal(<<~CFG, cfg.disasm) + == cfg: #@:1 (1,0)-(1,0)> + block_0 + 0000 putobject 100 + 0002 putobject 14 + 0004 putobject_INT2FIX_0_ + 0005 opt_lt + 0007 branchunless 13 + == to: block_13, block_9 + block_9 + == from: block_0 + 0009 putobject -1 + 0011 jump 14 + == to: block_14 + block_13 + == from: block_0 + 0013 putobject_INT2FIX_1_ + == to: block_14 + block_14 + == from: block_9, block_13 + 0014 opt_plus + 0016 leave + == to: leaves + CFG + end + + def test_dfg + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + + assert_equal(<<~DFG, dfg.disasm) + == dfg: #@:1 (1,0)-(1,0)> + block_0 + 0000 putobject 100 # out: out_0 + 0002 putobject 14 # out: 5 + 0004 putobject_INT2FIX_0_ # out: 5 + 0005 opt_lt # in: 2, 4; out: 7 + 0007 branchunless 13 # in: 5 + == to: block_13, block_9 + == out: 0 + block_9 + == from: block_0 + == in: pass_0 + 0009 putobject -1 # out: out_0 + 0011 jump 14 + == to: block_14 + == out: pass_0, 9 + block_13 + == from: block_0 + == in: pass_0 + 0013 putobject_INT2FIX_1_ # out: out_0 + == to: block_14 + == out: pass_0, 13 + block_14 + == from: block_9, block_13 + == in: in_0, in_1 + 0014 opt_plus # in: in_0, in_1; out: 16 + 0016 leave # in: 14 + == to: leaves + DFG + end + private def assert_decompiles(expected, source)