From 0f11b7e1d1afe7f3c9b284d5b140fed15ecf2a72 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 13:31:56 -0500 Subject: [PATCH 01/14] Add query methods for instructions for branching logic --- lib/syntax_tree/yarv/instructions.rb | 774 ++++++--------------------- lib/syntax_tree/yarv/legacy.rb | 36 +- test/yarv_test.rb | 34 +- 3 files changed, 193 insertions(+), 651 deletions(-) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index bba06f8d..c387e763 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -63,6 +63,50 @@ def self.calldata( CallData.new(method, argc, flags, kw_arg) end + # This is a base class for all YARV instructions. It provides a few + # convenience methods for working with instructions. + class Instruction + # This method creates an instruction that represents the canonical + # (non-specialized) form of this instruction. If this instruction is not + # a specialized instruction, then this method returns `self`. + def canonical + self + end + + # This returns the size of the instruction in terms of the number of slots + # it occupies in the instruction sequence. Effectively this is 1 plus the + # number of operands. + def length + 1 + end + + # This returns the number of values that are pushed onto the stack. + def pushes + 0 + end + + # This returns the number of values that are popped off the stack. + def pops + 0 + end + + # Whether or not this instruction is a branch instruction. + def branches? + false + end + + # Whether or not this instruction leaves the current frame. + def leaves? + false + end + + # Whether or not this instruction falls through to the next instruction if + # its branching fails. + def falls_through? + false + end + end + # ### Summary # # `adjuststack` accepts a single integer argument and removes that many @@ -76,7 +120,7 @@ def self.calldata( # x[0] # ~~~ # - class AdjustStack + class AdjustStack < Instruction attr_reader :number def initialize(number) @@ -107,14 +151,6 @@ def pops number end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.pop(number) end @@ -138,7 +174,7 @@ def call(vm) # "#{5}" # ~~~ # - class AnyToString + class AnyToString < Instruction def disasm(fmt) fmt.instruction("anytostring") end @@ -155,10 +191,6 @@ def ==(other) other.is_a?(AnyToString) end - def length - 1 - end - def pops 2 end @@ -167,10 +199,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) original, value = vm.pop(2) @@ -198,7 +226,7 @@ def call(vm) # puts x # ~~~ # - class BranchIf + class BranchIf < Instruction attr_reader :label def initialize(label) @@ -229,16 +257,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) if vm.pop end - def canonical - self + def branches? + true end - def call(vm) - vm.jump(label) if vm.pop + def falls_through? + true end end @@ -259,7 +287,7 @@ def call(vm) # end # ~~~ # - class BranchNil + class BranchNil < Instruction attr_reader :label def initialize(label) @@ -290,16 +318,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) if vm.pop.nil? end - def canonical - self + def branches? + true end - def call(vm) - vm.jump(label) if vm.pop.nil? + def falls_through? + true end end @@ -319,7 +347,7 @@ def call(vm) # end # ~~~ # - class BranchUnless + class BranchUnless < Instruction attr_reader :label def initialize(label) @@ -350,16 +378,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(label) unless vm.pop end - def canonical - self + def branches? + true end - def call(vm) - vm.jump(label) unless vm.pop + def falls_through? + true end end @@ -382,7 +410,7 @@ def call(vm) # evaluate(value: 3) # ~~~ # - class CheckKeyword + class CheckKeyword < Instruction attr_reader :keyword_bits_index, :keyword_index def initialize(keyword_bits_index, keyword_index) @@ -419,18 +447,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(keyword_bits_index, 0)[keyword_index]) end @@ -448,7 +468,7 @@ def call(vm) # foo in Foo # ~~~ # - class CheckMatch + class CheckMatch < Instruction VM_CHECKMATCH_TYPE_WHEN = 1 VM_CHECKMATCH_TYPE_CASE = 2 VM_CHECKMATCH_TYPE_RESCUE = 3 @@ -489,10 +509,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) target, pattern = vm.pop(2) @@ -536,7 +552,7 @@ def check?(pattern, target) # foo in [bar] # ~~~ # - class CheckType + class CheckType < Instruction TYPE_OBJECT = 0x01 TYPE_CLASS = 0x02 TYPE_MODULE = 0x03 @@ -643,10 +659,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) object = vm.pop result = @@ -713,7 +725,7 @@ def call(vm) # [1, *2] # ~~~ # - class ConcatArray + class ConcatArray < Instruction def disasm(fmt) fmt.instruction("concatarray") end @@ -730,10 +742,6 @@ def ==(other) other.is_a?(ConcatArray) end - def length - 1 - end - def pops 2 end @@ -742,10 +750,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) left, right = vm.pop(2) vm.push([*left, *right]) @@ -767,7 +771,7 @@ def call(vm) # "#{5}" # ~~~ # - class ConcatStrings + class ConcatStrings < Instruction attr_reader :number def initialize(number) @@ -802,10 +806,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).join) end @@ -826,7 +826,7 @@ def call(vm) # end # ~~~ # - class DefineClass + class DefineClass < Instruction TYPE_CLASS = 0 TYPE_SINGLETON_CLASS = 1 TYPE_MODULE = 2 @@ -874,10 +874,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) object, superclass = vm.pop(2) @@ -914,7 +910,7 @@ def call(vm) # defined?(x) # ~~~ # - class Defined + class Defined < Instruction TYPE_NIL = 1 TYPE_IVAR = 2 TYPE_LVAR = 3 @@ -1011,10 +1007,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) object = vm.pop @@ -1069,7 +1061,7 @@ def call(vm) # def value = "value" # ~~~ # - class DefineMethod + class DefineMethod < Instruction attr_reader :method_name, :method_iseq def initialize(method_name, method_iseq) @@ -1102,18 +1094,6 @@ def length 3 end - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - def call(vm) name = method_name nesting = vm.frame.nesting @@ -1150,7 +1130,7 @@ def call(vm) # def self.value = "value" # ~~~ # - class DefineSMethod + class DefineSMethod < Instruction attr_reader :method_name, :method_iseq def initialize(method_name, method_iseq) @@ -1187,14 +1167,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) name = method_name nesting = vm.frame.nesting @@ -1227,7 +1199,7 @@ def call(vm) # $global = 5 # ~~~ # - class Dup + class Dup < Instruction def disasm(fmt) fmt.instruction("dup") end @@ -1244,10 +1216,6 @@ def ==(other) other.is_a?(Dup) end - def length - 1 - end - def pops 1 end @@ -1256,10 +1224,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) vm.push(vm.stack.last.dup) end @@ -1275,7 +1239,7 @@ def call(vm) # [true] # ~~~ # - class DupArray + class DupArray < Instruction attr_reader :object def initialize(object) @@ -1302,18 +1266,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -1329,7 +1285,7 @@ def call(vm) # { a: 1 } # ~~~ # - class DupHash + class DupHash < Instruction attr_reader :object def initialize(object) @@ -1356,18 +1312,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -1383,7 +1331,7 @@ def call(vm) # Object::X ||= true # ~~~ # - class DupN + class DupN < Instruction attr_reader :number def initialize(number) @@ -1410,18 +1358,10 @@ def length 2 end - def pops - 0 - end - def pushes number end - def canonical - self - end - def call(vm) values = vm.pop(number) vm.push(*values) @@ -1441,7 +1381,7 @@ def call(vm) # x, = [true, false, nil] # ~~~ # - class ExpandArray + class ExpandArray < Instruction attr_reader :number, :flags def initialize(number, flags) @@ -1478,10 +1418,6 @@ def pushes number end - def canonical - self - end - def call(vm) object = vm.pop object = @@ -1539,7 +1475,7 @@ def call(vm) # end # ~~~ # - class GetBlockParam + class GetBlockParam < Instruction attr_reader :index, :level def initialize(index, level) @@ -1570,18 +1506,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1602,7 +1530,7 @@ def call(vm) # end # ~~~ # - class GetBlockParamProxy + class GetBlockParamProxy < Instruction attr_reader :index, :level def initialize(index, level) @@ -1636,18 +1564,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1665,7 +1585,7 @@ def call(vm) # @@class_variable # ~~~ # - class GetClassVariable + class GetClassVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -1697,18 +1617,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) @@ -1728,7 +1640,7 @@ def call(vm) # Constant # ~~~ # - class GetConstant + class GetConstant < Instruction attr_reader :name def initialize(name) @@ -1763,10 +1675,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) const_base, allow_nil = vm.pop(2) @@ -1798,7 +1706,7 @@ def call(vm) # $$ # ~~~ # - class GetGlobal + class GetGlobal < Instruction attr_reader :name def initialize(name) @@ -1825,18 +1733,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) # Evaluating the name of the global variable because there isn't a # reflection API for global variables. @@ -1861,7 +1761,7 @@ def call(vm) # @instance_variable # ~~~ # - class GetInstanceVariable + class GetInstanceVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -1893,18 +1793,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) method = Object.instance_method(:instance_variable_get) vm.push(method.bind(vm.frame._self).call(name)) @@ -1925,7 +1817,7 @@ def call(vm) # tap { tap { value } } # ~~~ # - class GetLocal + class GetLocal < Instruction attr_reader :index, :level def initialize(index, level) @@ -1955,18 +1847,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.local_get(index, level)) end @@ -1985,7 +1869,7 @@ def call(vm) # value # ~~~ # - class GetLocalWC0 + class GetLocalWC0 < Instruction attr_reader :index def initialize(index) @@ -2012,10 +1896,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -2042,7 +1922,7 @@ def call(vm) # self.then { value } # ~~~ # - class GetLocalWC1 + class GetLocalWC1 < Instruction attr_reader :index def initialize(index) @@ -2069,10 +1949,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -2096,7 +1972,7 @@ def call(vm) # 1 if (a == 1) .. (b == 2) # ~~~ # - class GetSpecial + class GetSpecial < Instruction SVAR_LASTLINE = 0 # $_ SVAR_BACKREF = 1 # $~ SVAR_FLIPFLOP_START = 2 # flipflop @@ -2128,18 +2004,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) case key when SVAR_LASTLINE @@ -2163,7 +2031,7 @@ def call(vm) # :"#{"foo"}" # ~~~ # - class Intern + class Intern < Instruction def disasm(fmt) fmt.instruction("intern") end @@ -2180,10 +2048,6 @@ def ==(other) other.is_a?(Intern) end - def length - 1 - end - def pops 1 end @@ -2192,10 +2056,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop.to_sym) end @@ -2215,7 +2075,7 @@ def call(vm) # end # ~~~ # - class InvokeBlock + class InvokeBlock < Instruction attr_reader :calldata def initialize(calldata) @@ -2250,10 +2110,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.frame_yield.block.call(*vm.pop(calldata.argc))) end @@ -2273,7 +2129,7 @@ def call(vm) # end # ~~~ # - class InvokeSuper + class InvokeSuper < Instruction attr_reader :calldata, :block_iseq def initialize(calldata, block_iseq) @@ -2302,10 +2158,6 @@ def ==(other) other.block_iseq == block_iseq end - def length - 1 - end - def pops argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) argb + calldata.argc + 1 @@ -2315,10 +2167,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) block = if (iseq = block_iseq) @@ -2358,7 +2206,7 @@ def call(vm) # end # ~~~ # - class Jump + class Jump < Instruction attr_reader :label def initialize(label) @@ -2385,21 +2233,13 @@ def length 2 end - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.jump(label) end + + def branches? + true + end end # ### Summary @@ -2412,7 +2252,7 @@ def call(vm) # ;; # ~~~ # - class Leave + class Leave < Instruction def disasm(fmt) fmt.instruction("leave") end @@ -2429,10 +2269,6 @@ def ==(other) other.is_a?(Leave) end - def length - 1 - end - def pops 1 end @@ -2443,13 +2279,17 @@ def pushes 0 end - def canonical - self - end - def call(vm) vm.leave end + + def branches? + true + end + + def leaves? + true + end end # ### Summary @@ -2464,7 +2304,7 @@ def call(vm) # ["string"] # ~~~ # - class NewArray + class NewArray < Instruction attr_reader :number def initialize(number) @@ -2499,10 +2339,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number)) end @@ -2520,7 +2356,7 @@ def call(vm) # ["string", **{ foo: "bar" }] # ~~~ # - class NewArrayKwSplat + class NewArrayKwSplat < Instruction attr_reader :number def initialize(number) @@ -2555,10 +2391,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number)) end @@ -2578,7 +2410,7 @@ def call(vm) # end # ~~~ # - class NewHash + class NewHash < Instruction attr_reader :number def initialize(number) @@ -2613,10 +2445,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).each_slice(2).to_h) end @@ -2637,7 +2465,7 @@ def call(vm) # p (x..y), (x...y) # ~~~ # - class NewRange + class NewRange < Instruction attr_reader :exclude_end def initialize(exclude_end) @@ -2672,10 +2500,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(Range.new(*vm.pop(2), exclude_end == 1)) end @@ -2692,7 +2516,7 @@ def call(vm) # raise rescue true # ~~~ # - class Nop + class Nop < Instruction def disasm(fmt) fmt.instruction("nop") end @@ -2709,22 +2533,6 @@ def ==(other) other.is_a?(Nop) end - def length - 1 - end - - def pops - 0 - end - - def pushes - 0 - end - - def canonical - self - end - def call(vm) end end @@ -2743,7 +2551,7 @@ def call(vm) # "#{5}" # ~~~ # - class ObjToString + class ObjToString < Instruction attr_reader :calldata def initialize(calldata) @@ -2778,10 +2586,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop.to_s) end @@ -2800,7 +2604,7 @@ def call(vm) # END { puts "END" } # ~~~ # - class Once + class Once < Instruction attr_reader :iseq, :cache def initialize(iseq, cache) @@ -2829,18 +2633,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) return if @executed vm.push(vm.run_block_frame(iseq, vm.frame)) @@ -2861,7 +2657,7 @@ def call(vm) # 2 & 3 # ~~~ # - class OptAnd + class OptAnd < Instruction attr_reader :calldata def initialize(calldata) @@ -2917,7 +2713,7 @@ def call(vm) # 7[2] # ~~~ # - class OptAref + class OptAref < Instruction attr_reader :calldata def initialize(calldata) @@ -2974,7 +2770,7 @@ def call(vm) # { 'test' => true }['test'] # ~~~ # - class OptArefWith + class OptArefWith < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -3014,10 +2810,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop[object]) end @@ -3036,7 +2828,7 @@ def call(vm) # {}[:key] = value # ~~~ # - class OptAset + class OptAset < Instruction attr_reader :calldata def initialize(calldata) @@ -3092,7 +2884,7 @@ def call(vm) # {}["key"] = value # ~~~ # - class OptAsetWith + class OptAsetWith < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -3132,10 +2924,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) hash, value = vm.pop(2) vm.push(hash[object] = value) @@ -3165,7 +2953,7 @@ def call(vm) # end # ~~~ # - class OptCaseDispatch + class OptCaseDispatch < Instruction attr_reader :case_dispatch_hash, :else_label def initialize(case_dispatch_hash, else_label) @@ -3206,16 +2994,16 @@ def pops 1 end - def pushes - 0 + def call(vm) + vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) end - def canonical - self + def branches? + true end - def call(vm) - vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) + def falls_through? + true end end @@ -3232,7 +3020,7 @@ def call(vm) # 2 / 3 # ~~~ # - class OptDiv + class OptDiv < Instruction attr_reader :calldata def initialize(calldata) @@ -3288,7 +3076,7 @@ def call(vm) # "".empty? # ~~~ # - class OptEmptyP + class OptEmptyP < Instruction attr_reader :calldata def initialize(calldata) @@ -3345,7 +3133,7 @@ def call(vm) # 2 == 2 # ~~~ # - class OptEq + class OptEq < Instruction attr_reader :calldata def initialize(calldata) @@ -3402,7 +3190,7 @@ def call(vm) # 4 >= 3 # ~~~ # - class OptGE + class OptGE < Instruction attr_reader :calldata def initialize(calldata) @@ -3458,7 +3246,7 @@ def call(vm) # ::Object # ~~~ # - class OptGetConstantPath + class OptGetConstantPath < Instruction attr_reader :names def initialize(names) @@ -3486,18 +3274,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) current = vm.frame._self current = current.class unless current.is_a?(Class) @@ -3523,7 +3303,7 @@ def call(vm) # 4 > 3 # ~~~ # - class OptGT + class OptGT < Instruction attr_reader :calldata def initialize(calldata) @@ -3580,7 +3360,7 @@ def call(vm) # 3 <= 4 # ~~~ # - class OptLE + class OptLE < Instruction attr_reader :calldata def initialize(calldata) @@ -3637,7 +3417,7 @@ def call(vm) # "".length # ~~~ # - class OptLength + class OptLength < Instruction attr_reader :calldata def initialize(calldata) @@ -3694,7 +3474,7 @@ def call(vm) # 3 < 4 # ~~~ # - class OptLT + class OptLT < Instruction attr_reader :calldata def initialize(calldata) @@ -3751,7 +3531,7 @@ def call(vm) # "" << 2 # ~~~ # - class OptLTLT + class OptLTLT < Instruction attr_reader :calldata def initialize(calldata) @@ -3809,7 +3589,7 @@ def call(vm) # 3 - 2 # ~~~ # - class OptMinus + class OptMinus < Instruction attr_reader :calldata def initialize(calldata) @@ -3866,7 +3646,7 @@ def call(vm) # 4 % 2 # ~~~ # - class OptMod + class OptMod < Instruction attr_reader :calldata def initialize(calldata) @@ -3923,7 +3703,7 @@ def call(vm) # 3 * 2 # ~~~ # - class OptMult + class OptMult < Instruction attr_reader :calldata def initialize(calldata) @@ -3982,7 +3762,7 @@ def call(vm) # 2 != 2 # ~~~ # - class OptNEq + class OptNEq < Instruction attr_reader :eq_calldata, :neq_calldata def initialize(eq_calldata, neq_calldata) @@ -4022,10 +3802,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) receiver, argument = vm.pop(2) vm.push(receiver != argument) @@ -4044,7 +3820,7 @@ def call(vm) # [a, b, c].max # ~~~ # - class OptNewArrayMax + class OptNewArrayMax < Instruction attr_reader :number def initialize(number) @@ -4079,10 +3855,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).max) end @@ -4100,7 +3872,7 @@ def call(vm) # [a, b, c].min # ~~~ # - class OptNewArrayMin + class OptNewArrayMin < Instruction attr_reader :number def initialize(number) @@ -4135,10 +3907,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.pop(number).min) end @@ -4157,7 +3925,7 @@ def call(vm) # "".nil? # ~~~ # - class OptNilP + class OptNilP < Instruction attr_reader :calldata def initialize(calldata) @@ -4212,7 +3980,7 @@ def call(vm) # !true # ~~~ # - class OptNot + class OptNot < Instruction attr_reader :calldata def initialize(calldata) @@ -4269,7 +4037,7 @@ def call(vm) # 2 | 3 # ~~~ # - class OptOr + class OptOr < Instruction attr_reader :calldata def initialize(calldata) @@ -4326,7 +4094,7 @@ def call(vm) # 2 + 3 # ~~~ # - class OptPlus + class OptPlus < Instruction attr_reader :calldata def initialize(calldata) @@ -4382,7 +4150,7 @@ def call(vm) # /a/ =~ "a" # ~~~ # - class OptRegExpMatch2 + class OptRegExpMatch2 < Instruction attr_reader :calldata def initialize(calldata) @@ -4438,7 +4206,7 @@ def call(vm) # puts "Hello, world!" # ~~~ # - class OptSendWithoutBlock + class OptSendWithoutBlock < Instruction attr_reader :calldata def initialize(calldata) @@ -4495,7 +4263,7 @@ def call(vm) # "".size # ~~~ # - class OptSize + class OptSize < Instruction attr_reader :calldata def initialize(calldata) @@ -4551,7 +4319,7 @@ def call(vm) # "hello".freeze # ~~~ # - class OptStrFreeze + class OptStrFreeze < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -4583,18 +4351,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.freeze) end @@ -4612,7 +4372,7 @@ def call(vm) # -"string" # ~~~ # - class OptStrUMinus + class OptStrUMinus < Instruction attr_reader :object, :calldata def initialize(object, calldata) @@ -4644,18 +4404,10 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(-object) end @@ -4674,7 +4426,7 @@ def call(vm) # "".succ # ~~~ # - class OptSucc + class OptSucc < Instruction attr_reader :calldata def initialize(calldata) @@ -4728,7 +4480,7 @@ def call(vm) # a ||= 2 # ~~~ # - class Pop + class Pop < Instruction def disasm(fmt) fmt.instruction("pop") end @@ -4745,22 +4497,10 @@ def ==(other) other.is_a?(Pop) end - def length - 1 - end - def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.pop end @@ -4776,7 +4516,7 @@ def call(vm) # nil # ~~~ # - class PutNil + class PutNil < Instruction def disasm(fmt) fmt.instruction("putnil") end @@ -4793,14 +4533,6 @@ def ==(other) other.is_a?(PutNil) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4824,7 +4556,7 @@ def call(vm) # 5 # ~~~ # - class PutObject + class PutObject < Instruction attr_reader :object def initialize(object) @@ -4851,18 +4583,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object) end @@ -4880,7 +4604,7 @@ def call(vm) # 0 # ~~~ # - class PutObjectInt2Fix0 + class PutObjectInt2Fix0 < Instruction def disasm(fmt) fmt.instruction("putobject_INT2FIX_0_") end @@ -4897,14 +4621,6 @@ def ==(other) other.is_a?(PutObjectInt2Fix0) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4930,7 +4646,7 @@ def call(vm) # 1 # ~~~ # - class PutObjectInt2Fix1 + class PutObjectInt2Fix1 < Instruction def disasm(fmt) fmt.instruction("putobject_INT2FIX_1_") end @@ -4947,14 +4663,6 @@ def ==(other) other.is_a?(PutObjectInt2Fix1) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end @@ -4978,7 +4686,7 @@ def call(vm) # puts "Hello, world!" # ~~~ # - class PutSelf + class PutSelf < Instruction def disasm(fmt) fmt.instruction("putself") end @@ -4995,22 +4703,10 @@ def ==(other) other.is_a?(PutSelf) end - def length - 1 - end - - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.frame._self) end @@ -5028,7 +4724,7 @@ def call(vm) # alias foo bar # ~~~ # - class PutSpecialObject + class PutSpecialObject < Instruction OBJECT_VMCORE = 1 OBJECT_CBASE = 2 OBJECT_CONST_BASE = 3 @@ -5059,18 +4755,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) case object when OBJECT_VMCORE @@ -5095,7 +4783,7 @@ def call(vm) # "foo" # ~~~ # - class PutString + class PutString < Instruction attr_reader :object def initialize(object) @@ -5122,18 +4810,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(object.dup) end @@ -5152,7 +4832,7 @@ def call(vm) # "hello".tap { |i| p i } # ~~~ # - class Send + class Send < Instruction attr_reader :calldata, :block_iseq def initialize(calldata, block_iseq) @@ -5194,10 +4874,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) block = if (iseq = block_iseq) @@ -5240,7 +4916,7 @@ def call(vm) # end # ~~~ # - class SetBlockParam + class SetBlockParam < Instruction attr_reader :index, :level def initialize(index, level) @@ -5275,14 +4951,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.local_set(index, level, vm.pop) end @@ -5301,7 +4969,7 @@ def call(vm) # @@class_variable = 1 # ~~~ # - class SetClassVariable + class SetClassVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -5337,14 +5005,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) clazz = vm.frame._self clazz = clazz.class unless clazz.is_a?(Class) @@ -5363,7 +5023,7 @@ def call(vm) # Constant = 1 # ~~~ # - class SetConstant + class SetConstant < Instruction attr_reader :name def initialize(name) @@ -5394,14 +5054,6 @@ def pops 2 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) value, parent = vm.pop(2) parent.const_set(name, value) @@ -5419,7 +5071,7 @@ def call(vm) # $global = 5 # ~~~ # - class SetGlobal + class SetGlobal < Instruction attr_reader :name def initialize(name) @@ -5450,14 +5102,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) # Evaluating the name of the global variable because there isn't a # reflection API for global variables. @@ -5481,7 +5125,7 @@ def call(vm) # @instance_variable = 1 # ~~~ # - class SetInstanceVariable + class SetInstanceVariable < Instruction attr_reader :name, :cache def initialize(name, cache) @@ -5517,14 +5161,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) method = Object.instance_method(:instance_variable_set) method.bind(vm.frame._self).call(name, vm.pop) @@ -5545,7 +5181,7 @@ def call(vm) # tap { tap { value = 10 } } # ~~~ # - class SetLocal + class SetLocal < Instruction attr_reader :index, :level def initialize(index, level) @@ -5579,14 +5215,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) vm.local_set(index, level, vm.pop) end @@ -5605,7 +5233,7 @@ def call(vm) # value = 5 # ~~~ # - class SetLocalWC0 + class SetLocalWC0 < Instruction attr_reader :index def initialize(index) @@ -5636,10 +5264,6 @@ def pops 1 end - def pushes - 0 - end - def canonical SetLocal.new(index, 0) end @@ -5662,7 +5286,7 @@ def call(vm) # self.then { value = 10 } # ~~~ # - class SetLocalWC1 + class SetLocalWC1 < Instruction attr_reader :index def initialize(index) @@ -5693,10 +5317,6 @@ def pops 1 end - def pushes - 0 - end - def canonical SetLocal.new(index, 1) end @@ -5717,7 +5337,7 @@ def call(vm) # {}[:key] = 'val' # ~~~ # - class SetN + class SetN < Instruction attr_reader :number def initialize(number) @@ -5752,10 +5372,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.stack[-number - 1] = vm.stack.last end @@ -5773,7 +5389,7 @@ def call(vm) # baz if (foo == 1) .. (bar == 1) # ~~~ # - class SetSpecial + class SetSpecial < Instruction attr_reader :key def initialize(key) @@ -5804,14 +5420,6 @@ def pops 1 end - def pushes - 0 - end - - def canonical - self - end - def call(vm) case key when GetSpecial::SVAR_LASTLINE @@ -5836,7 +5444,7 @@ def call(vm) # x = *(5) # ~~~ # - class SplatArray + class SplatArray < Instruction attr_reader :flag def initialize(flag) @@ -5871,10 +5479,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) value = vm.pop @@ -5914,7 +5518,7 @@ def call(vm) # !!defined?([[]]) # ~~~ # - class Swap + class Swap < Instruction def disasm(fmt) fmt.instruction("swap") end @@ -5931,10 +5535,6 @@ def ==(other) other.is_a?(Swap) end - def length - 1 - end - def pops 2 end @@ -5943,10 +5543,6 @@ def pushes 2 end - def canonical - self - end - def call(vm) left, right = vm.pop(2) vm.push(right, left) @@ -5965,7 +5561,7 @@ def call(vm) # [1, 2, 3].map { break 2 } # ~~~ # - class Throw + class Throw < Instruction RUBY_TAG_NONE = 0x0 RUBY_TAG_RETURN = 0x1 RUBY_TAG_BREAK = 0x2 @@ -6013,10 +5609,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) state = type & VM_THROW_STATE_MASK value = vm.pop @@ -6072,7 +5664,7 @@ def error_backtrace(vm) # end # ~~~ # - class TopN + class TopN < Instruction attr_reader :number def initialize(number) @@ -6099,18 +5691,10 @@ def length 2 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(vm.stack[-number - 1]) end @@ -6127,7 +5711,7 @@ def call(vm) # /foo #{bar}/ # ~~~ # - class ToRegExp + class ToRegExp < Instruction attr_reader :options, :length def initialize(options, length) @@ -6160,10 +5744,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) vm.push(Regexp.new(vm.pop(length).join, options)) end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index ab9b00df..8e12ff16 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -19,7 +19,7 @@ module Legacy # @@class_variable # ~~~ # - class GetClassVariable + class GetClassVariable < Instruction attr_reader :name def initialize(name) @@ -46,10 +46,6 @@ def length 2 end - def pops - 0 - end - def pushes 1 end @@ -79,7 +75,7 @@ def call(vm) # Constant # ~~~ # - class OptGetInlineCache + class OptGetInlineCache < Instruction attr_reader :label, :cache def initialize(label, cache) @@ -111,21 +107,21 @@ def length 3 end - def pops - 0 - end - def pushes 1 end - def canonical - self - end - def call(vm) vm.push(nil) end + + def branches? + true + end + + def falls_through? + true + end end # ### Summary @@ -143,7 +139,7 @@ def call(vm) # Constant # ~~~ # - class OptSetInlineCache + class OptSetInlineCache < Instruction attr_reader :cache def initialize(cache) @@ -178,10 +174,6 @@ def pushes 1 end - def canonical - self - end - def call(vm) end end @@ -200,7 +192,7 @@ def call(vm) # @@class_variable = 1 # ~~~ # - class SetClassVariable + class SetClassVariable < Instruction attr_reader :name def initialize(name) @@ -231,10 +223,6 @@ def pops 1 end - def pushes - 0 - end - def canonical YARV::SetClassVariable.new(name, nil) end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index e3995435..c4c4c3bd 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -288,38 +288,12 @@ def value end end - instructions = - YARV.constants.map { YARV.const_get(_1) } + - YARV::Legacy.constants.map { YARV::Legacy.const_get(_1) } - - [ - YARV::Assembler, - YARV::Bf, - YARV::CallData, - YARV::Compiler, - YARV::Decompiler, - YARV::Disassembler, - YARV::InstructionSequence, - YARV::Legacy, - YARV::LocalTable, - YARV::VM - ] + ObjectSpace.each_object(YARV::Instruction.singleton_class) do |instruction| + next if instruction == YARV::Instruction - interface = %i[ - disasm - to_a - deconstruct_keys - length - pops - pushes - canonical - call - == - ] - - instructions.each do |instruction| define_method("test_instruction_interface_#{instruction.name}") do - instance_methods = instruction.instance_methods(false) - assert_empty(interface - instance_methods) + methods = instruction.instance_methods(false) + assert_empty(%i[disasm to_a deconstruct_keys call ==] - methods) end end From 33d36ed2bbd61da601cfe6b7f5e248cd405d356f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 14:13:47 -0500 Subject: [PATCH 02/14] Add a control flow graph --- lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/control_flow_graph.rb | 162 +++++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 lib/syntax_tree/yarv/control_flow_graph.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 73add469..ea365172 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -31,6 +31,7 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" +require_relative "syntax_tree/yarv/control_flow_graph" require_relative "syntax_tree/yarv/decompiler" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb new file mode 100644 index 00000000..15e0a767 --- /dev/null +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -0,0 +1,162 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # Constructs a control-flow-graph of a YARV instruction sequence. We use + # conventional basic-blocks. + class ControlFlowGraph + # This object represents a single basic block, wherein all contained + # instructions do not branch except for the last one. + class BasicBlock + # This is the index into the list of instructions where this block + # starts. + attr_reader :block_start + + # This is the set of instructions that this block contains. + attr_reader :insns + + # This is an array of basic blocks that are predecessors to this block. + attr_reader :preds + + # This is an array of basic blocks that are successors to this block. + attr_reader :succs + + def initialize(block_start, insns) + @block_start = block_start + @insns = insns + + @preds = [] + @succs = [] + end + + def id + "block_#{block_start}" + end + + def last + insns.last + end + end + + # This is the instruction sequence that this control flow graph + # corresponds to. + attr_reader :iseq + + # This is the list of instructions that this control flow graph contains. + # It is effectively the same as the list of instructions in the + # instruction sequence but with line numbers and events filtered out. + attr_reader :insns + + # This is the set of basic blocks that this control-flow graph contains. + attr_reader :blocks + + def initialize(iseq, insns, blocks) + @iseq = iseq + @insns = insns + @blocks = blocks + end + + def self.compile(iseq) + # First, we need to find all of the instructions that immediately follow + # labels so that when we are looking at instructions that branch we know + # where they branch to. + labels = {} + insns = [] + + iseq.insns.each do |insn| + case insn + when Instruction + insns << insn + when InstructionSequence::Label + labels[insn] = insns.length + end + end + + # Now we need to find the indices of the instructions that start a basic + # block because they're either: + # + # * the start of an instruction sequence + # * the target of a branch + # * fallen through to from a branch + # + block_starts = Set.new([0]) + + insns.each_with_index do |insn, index| + if insn.branches? + block_starts.add(labels[insn.label]) if insn.respond_to?(:label) + block_starts.add(index + 1) if insn.falls_through? + end + end + + block_starts = block_starts.to_a.sort + + # Now we can build up a set of basic blocks by iterating over the starts + # of each block. They are keyed by the index of their first instruction. + blocks = {} + block_starts.each_with_index do |block_start, block_index| + block_stop = (block_starts[(block_index + 1)..] + [insns.length]).min + + blocks[block_start] = + BasicBlock.new(block_start, insns[block_start...block_stop]) + end + + # Now we need to connect the blocks by letting them know which blocks + # precede them and which blocks follow them. + blocks.each do |block_start, block| + insn = block.last + + if insn.branches? && insn.respond_to?(:label) + block.succs << blocks.fetch(labels[insn.label]) + end + + if (!insn.branches? && !insn.leaves?) || insn.falls_through? + block.succs << blocks.fetch(block_start + block.insns.length) + end + + block.succs.each { |succ| succ.preds << block } + end + + # Here we're going to verify that we set up the control flow graph + # correctly. To do so we will assert that the only instruction in any + # given block that branches is the last instruction in the block. + blocks.each_value do |block| + block.insns[0...-1].each { |insn| raise if insn.branches? } + end + + # Finally we can return a new control flow graph with the given + # instruction sequence and our set of basic blocks. + new(iseq, insns, blocks.values) + end + + def disasm + fmt = Disassembler.new + + output = StringIO.new + output.puts "== cfg #{iseq.name}" + + blocks.each do |block| + output.print(block.id) + + unless block.preds.empty? + output.print(" # from: #{block.preds.map(&:id).join(", ")}") + end + + output.puts + + block.insns.each do |insn| + output.print(" ") + output.puts(insn.disasm(fmt)) + end + + succs = block.succs.map(&:id) + succs << "leaves" if block.last.leaves? + output.print(" # to: #{succs.join(", ")}") unless succs.empty? + + output.puts + end + + output.string + end + end + end +end From 7e6e4d139ccc83d8a3a9dec301fb955919ee98f9 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 14:53:37 -0500 Subject: [PATCH 03/14] Build a data flow graph --- lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/control_flow_graph.rb | 1 - lib/syntax_tree/yarv/data_flow_graph.rb | 214 +++++++++++++++++++++ 3 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 lib/syntax_tree/yarv/data_flow_graph.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index ea365172..c6f1223b 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -32,6 +32,7 @@ require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" require_relative "syntax_tree/yarv/control_flow_graph" +require_relative "syntax_tree/yarv/data_flow_graph" require_relative "syntax_tree/yarv/decompiler" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 15e0a767..26849b64 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -130,7 +130,6 @@ def self.compile(iseq) def disasm fmt = Disassembler.new - output = StringIO.new output.puts "== cfg #{iseq.name}" diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb new file mode 100644 index 00000000..b028c521 --- /dev/null +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -0,0 +1,214 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # Constructs a data-flow-graph of a YARV instruction sequence, via a + # control-flow-graph. Data flow is discovered locally and then globally. The + # graph only considers data flow through the stack - local variables and + # objects are considered fully escaped in this analysis. + class DataFlowGraph + # This object represents the flow of data between instructions. + class DataFlow + attr_reader :in + attr_reader :out + + def initialize + @in = [] + @out = [] + end + end + + attr_reader :cfg, :insn_flows, :block_flows + + def initialize(cfg, insn_flows, block_flows) + @cfg = cfg + @insn_flows = insn_flows + @block_flows = block_flows + end + + def self.compile(cfg) + # First, create a data structure to encode data flow between + # instructions. + insn_flows = {} + cfg.insns.each_with_index do |insn, index| + insn_flows[index] = DataFlow.new + end + + # Next, create a data structure to encode data flow between basic + # blocks. + block_flows = {} + cfg.blocks.each do |block| + block_flows[block.block_start] = DataFlow.new + end + + # Now, discover the data flow within each basic block. Using an abstract + # stack, connect from consumers of data to the producers of that data. + cfg.blocks.each do |block| + block_flow = block_flows.fetch(block.block_start) + + stack = [] + stack_initial_depth = 0 + + # Go through each instruction in the block... + block.insns.each.with_index(block.block_start) do |insn, index| + insn_flow = insn_flows[index] + + # How many values will be missing from the local stack to run this + # instruction? + missing_stack_values = insn.pops - stack.size + + # For every value the instruction pops off the stack... + insn.pops.times do + # Was the value it pops off from another basic block? + if stack.empty? + # This is a basic block argument. + name = :"in_#{missing_stack_values - 1}" + + insn_flow.in.unshift(name) + block_flow.in.unshift(name) + + stack_initial_depth += 1 + missing_stack_values -= 1 + else + # Connect this consumer to the producer of the value. + insn_flow.in.unshift(stack.pop) + end + end + + # Record on our abstract stack that this instruction pushed + # this value onto the stack. + insn.pushes.times { stack << index } + end + + # Values that are left on the stack after going through all + # instructions are arguments to the basic block that we jump to. + stack.reverse_each.with_index do |producer, index| + block_flow.out << producer + insn_flows[producer].out << :"out_#{index}" + end + end + + # Go backwards and connect from producers to consumers. + cfg.insns.each_with_index do |insn, index| + # For every instruction that produced a value used in this + # instruction... + insn_flows[index].in.each do |producer| + # If it's actually another instruction and not a basic block + # argument... + if producer.is_a?(Integer) + # Record in the producing instruction that it produces a value + # used by this construction. + insn_flows[producer].out << index + end + end + end + + # Now, discover the data flow between basic blocks. + stack = [*cfg.blocks] + until stack.empty? + succ = stack.pop + succ_flow = block_flows.fetch(succ.block_start) + succ.preds.each do |pred| + pred_flow = block_flows.fetch(pred.block_start) + + # Does a predecessor block have fewer outputs than the successor + # has inputs? + if pred_flow.out.size < succ_flow.in.size + # If so then add arguments to pass data through from the + # predecessor's predecessors. + (succ_flow.in.size - pred_flow.out.size).times do |index| + name = :"pass_#{index}" + pred_flow.in.unshift(name) + pred_flow.out.unshift(name) + end + + # Since we modified the predecessor, add it back to the worklist + # so it'll be considered as a successor again, and propogate the + # global data flow back up the control flow graph. + stack << pred + end + end + end + + # Verify that we constructed the data flow graph correctly. Check that + # the first block has no arguments. + raise unless block_flows.fetch(cfg.blocks.first.block_start).in.empty? + + # Check all control flow edges between blocks pass the right number of + # arguments. + cfg.blocks.each do |pred| + pred_flow = block_flows.fetch(pred.block_start) + + if pred.succs.empty? + # With no successors, there should be no output arguments. + raise unless pred_flow.out.empty? + else + # Check with successor... + pred.succs.each do |succ| + succ_flow = block_flows.fetch(succ.block_start) + + # The predecessor should have as many output arguments as the + # success has input arguments. + raise unless pred_flow.out.size == succ_flow.in.size + end + end + end + + # Finally we can return the data flow graph. + new(cfg, insn_flows, block_flows) + end + + def disasm + fmt = Disassembler.new + output = StringIO.new + output.puts "== dfg #{cfg.iseq.name}" + + cfg.blocks.each do |block| + output.print(block.id) + unless block.preds.empty? + output.print(" # from: #{block.preds.map(&:id).join(", ")}") + end + output.puts + + block_flow = block_flows.fetch(block.block_start) + unless block_flow.in.empty? + output.puts " # in: #{block_flow.in.join(", ")}" + end + + block.insns.each.with_index(block.block_start) do |insn, index| + output.print(" ") + output.print(insn.disasm(fmt)) + + insn_flow = insn_flows[index] + if insn_flow.in.empty? && insn_flow.out.empty? + output.puts + next + end + + output.print(" # ") + unless insn_flow.in.empty? + output.print("in: #{insn_flow.in.join(", ")}") + output.print("; ") unless insn_flow.out.empty? + end + + unless insn_flow.out.empty? + output.print("out: #{insn_flow.out.join(", ")}") + end + + output.puts + end + + succs = block.succs.map(&:id) + succs << "leaves" if block.last.leaves? + output.puts(" # to: #{succs.join(", ")}") unless succs.empty? + + unless block_flow.out.empty? + output.puts " # out: #{block_flow.out.join(", ")}" + end + end + + output.string + end + end + end +end \ No newline at end of file From 907cf23b2e8245cd99b6839f06a2bae40b0ae393 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 16:38:08 -0500 Subject: [PATCH 04/14] More documentation --- lib/syntax_tree/yarv/control_flow_graph.rb | 180 +++++++++++++-------- test/yarv_test.rb | 63 ++++++++ 2 files changed, 174 insertions(+), 69 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 26849b64..cd8a8324 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -2,12 +2,24 @@ module SyntaxTree module YARV - # Constructs a control-flow-graph of a YARV instruction sequence. We use - # conventional basic-blocks. + # This class represents a control flow graph of a YARV instruction sequence. + # It constructs a graph of basic blocks that hold subsets of the list of + # instructions from the instruction sequence. + # + # You can use this class by calling the ::compile method and passing it a + # YARV instruction sequence. It will return a control flow graph object. + # + # iseq = RubyVM::InstructionSequence.compile("1 + 2") + # iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + # class ControlFlowGraph # This object represents a single basic block, wherein all contained # instructions do not branch except for the last one. class BasicBlock + # This is the unique identifier for this basic block. + attr_reader :id + # This is the index into the list of instructions where this block # starts. attr_reader :block_start @@ -22,6 +34,8 @@ class BasicBlock attr_reader :succs def initialize(block_start, insns) + @id = "block_#{block_start}" + @block_start = block_start @insns = insns @@ -29,8 +43,11 @@ def initialize(block_start, insns) @succs = [] end - def id - "block_#{block_start}" + # This method is used to verify that the basic block is well formed. It + # checks that the only instruction in this basic block that branches is + # the last instruction. + def verify + insns[0...-1].each { |insn| raise if insn.branches? } end def last @@ -38,94 +55,108 @@ def last end end - # This is the instruction sequence that this control flow graph - # corresponds to. - attr_reader :iseq - - # This is the list of instructions that this control flow graph contains. - # It is effectively the same as the list of instructions in the - # instruction sequence but with line numbers and events filtered out. - attr_reader :insns - - # This is the set of basic blocks that this control-flow graph contains. - attr_reader :blocks - - def initialize(iseq, insns, blocks) - @iseq = iseq - @insns = insns - @blocks = blocks - end - - def self.compile(iseq) - # First, we need to find all of the instructions that immediately follow - # labels so that when we are looking at instructions that branch we know - # where they branch to. - labels = {} - insns = [] - - iseq.insns.each do |insn| - case insn - when Instruction - insns << insn - when InstructionSequence::Label - labels[insn] = insns.length + # This class is responsible for creating a control flow graph from the + # given instruction sequence. + class Compiler + attr_reader :iseq, :labels, :insns + + def initialize(iseq) + @iseq = iseq + + # We need to find all of the instructions that immediately follow + # labels so that when we are looking at instructions that branch we + # know where they branch to. + @labels = {} + @insns = [] + + iseq.insns.each do |insn| + case insn + when Instruction + @insns << insn + when InstructionSequence::Label + @labels[insn] = @insns.length + end end end - # Now we need to find the indices of the instructions that start a basic - # block because they're either: + # This method is used to compile the instruction sequence into a control + # flow graph. It returns an instance of ControlFlowGraph. + def compile + blocks = connect_basic_blocks(build_basic_blocks) + ControlFlowGraph.new(iseq, insns, blocks.values).tap(&:verify) + end + + private + + # Finds the indices of the instructions that start a basic block because + # they're either: # # * the start of an instruction sequence # * the target of a branch # * fallen through to from a branch # - block_starts = Set.new([0]) - - insns.each_with_index do |insn, index| - if insn.branches? - block_starts.add(labels[insn.label]) if insn.respond_to?(:label) - block_starts.add(index + 1) if insn.falls_through? + def find_basic_block_starts + block_starts = Set.new([0]) + + insns.each_with_index do |insn, index| + if insn.branches? + block_starts.add(labels[insn.label]) if insn.respond_to?(:label) + block_starts.add(index + 1) if insn.falls_through? + end end + + block_starts.to_a.sort end - block_starts = block_starts.to_a.sort + # Builds up a set of basic blocks by iterating over the starts of each + # block. They are keyed by the index of their first instruction. + def build_basic_blocks + block_starts = find_basic_block_starts + blocks = {} - # Now we can build up a set of basic blocks by iterating over the starts - # of each block. They are keyed by the index of their first instruction. - blocks = {} - block_starts.each_with_index do |block_start, block_index| - block_stop = (block_starts[(block_index + 1)..] + [insns.length]).min + block_starts.each_with_index.to_h do |block_start, block_index| + block_end = (block_starts[(block_index + 1)..] + [insns.length]).min + block_insns = insns[block_start...block_end] - blocks[block_start] = - BasicBlock.new(block_start, insns[block_start...block_stop]) + [block_start, BasicBlock.new(block_start, block_insns)] + end end # Now we need to connect the blocks by letting them know which blocks # precede them and which blocks follow them. - blocks.each do |block_start, block| - insn = block.last + def connect_basic_blocks(blocks) + blocks.each do |block_start, block| + insn = block.last - if insn.branches? && insn.respond_to?(:label) - block.succs << blocks.fetch(labels[insn.label]) - end + if insn.branches? && insn.respond_to?(:label) + block.succs << blocks.fetch(labels[insn.label]) + end - if (!insn.branches? && !insn.leaves?) || insn.falls_through? - block.succs << blocks.fetch(block_start + block.insns.length) - end + if (!insn.branches? && !insn.leaves?) || insn.falls_through? + block.succs << blocks.fetch(block_start + block.insns.length) + end - block.succs.each { |succ| succ.preds << block } + block.succs.each { |succ| succ.preds << block } + end end + end - # Here we're going to verify that we set up the control flow graph - # correctly. To do so we will assert that the only instruction in any - # given block that branches is the last instruction in the block. - blocks.each_value do |block| - block.insns[0...-1].each { |insn| raise if insn.branches? } - end + # This is the instruction sequence that this control flow graph + # corresponds to. + attr_reader :iseq + + # This is the list of instructions that this control flow graph contains. + # It is effectively the same as the list of instructions in the + # instruction sequence but with line numbers and events filtered out. + attr_reader :insns + + # This is the set of basic blocks that this control-flow graph contains. + attr_reader :blocks - # Finally we can return a new control flow graph with the given - # instruction sequence and our set of basic blocks. - new(iseq, insns, blocks.values) + def initialize(iseq, insns, blocks) + @iseq = iseq + @insns = insns + @blocks = blocks end def disasm @@ -156,6 +187,17 @@ def disasm output.string end + + # This method is used to verify that the control flow graph is well + # formed. It does this by checking that each basic block is itself well + # formed. + def verify + blocks.each(&:verify) + end + + def self.compile(iseq) + Compiler.new(iseq).compile + end end end end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index c4c4c3bd..e37afb63 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -297,6 +297,69 @@ def value end end + def test_cfg + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + + assert_equal(<<~CFG, cfg.disasm) + == cfg + block_0 + putobject 100 + putobject 14 + putobject_INT2FIX_0_ + opt_lt + branchunless 13 + # to: block_7, block_5 + block_5 # from: block_0 + putobject -1 + jump 14 + # to: block_8 + block_7 # from: block_0 + putobject_INT2FIX_1_ + # to: block_8 + block_8 # from: block_5, block_7 + opt_plus + leave + # to: leaves + CFG + end + + def test_dfg + iseq = RubyVM::InstructionSequence.compile("100 + (14 < 0 ? -1 : +1)") + iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) + cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) + dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) + + assert_equal(<<~DFG, dfg.disasm) + == dfg + block_0 + putobject 100 # out: out_0 + putobject 14 # out: 3 + putobject_INT2FIX_0_ # out: 3 + opt_lt # in: 1, 2; out: 4 + branchunless 13 # in: 3 + # to: block_7, block_5 + # out: 0 + block_5 # from: block_0 + # in: pass_0 + putobject -1 # out: out_0 + jump 14 + # to: block_8 + # out: pass_0, 5 + block_7 # from: block_0 + # in: pass_0 + putobject_INT2FIX_1_ # out: out_0 + # to: block_8 + # out: pass_0, 7 + block_8 # from: block_5, block_7 + # in: in_0, in_1 + opt_plus # in: in_0, in_1; out: 9 + leave # in: 8 + # to: leaves + DFG + end + private def assert_decompiles(expected, source) From 7578736beb2f444a76f9ce60ca2181438922ef51 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 16:43:46 -0500 Subject: [PATCH 05/14] More moving around and documentation --- lib/syntax_tree/yarv/control_flow_graph.rb | 136 +++++++++++---------- lib/syntax_tree/yarv/data_flow_graph.rb | 18 +-- 2 files changed, 78 insertions(+), 76 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index cd8a8324..fa9823f1 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -14,6 +14,64 @@ module YARV # cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) # class ControlFlowGraph + # This is the instruction sequence that this control flow graph + # corresponds to. + attr_reader :iseq + + # This is the list of instructions that this control flow graph contains. + # It is effectively the same as the list of instructions in the + # instruction sequence but with line numbers and events filtered out. + attr_reader :insns + + # This is the set of basic blocks that this control-flow graph contains. + attr_reader :blocks + + def initialize(iseq, insns, blocks) + @iseq = iseq + @insns = insns + @blocks = blocks + end + + def disasm + fmt = Disassembler.new + output = StringIO.new + output.puts "== cfg #{iseq.name}" + + blocks.each do |block| + output.print(block.id) + + unless block.predecessors.empty? + output.print(" # from: #{block.predecessors.map(&:id).join(", ")}") + end + + output.puts + + block.insns.each do |insn| + output.print(" ") + output.puts(insn.disasm(fmt)) + end + + successors = block.successors.map(&:id) + successors << "leaves" if block.last.leaves? + output.print(" # to: #{successors.join(", ")}") unless successors.empty? + + output.puts + end + + output.string + end + + # This method is used to verify that the control flow graph is well + # formed. It does this by checking that each basic block is itself well + # formed. + def verify + blocks.each(&:verify) + end + + def self.compile(iseq) + Compiler.new(iseq).compile + end + # This object represents a single basic block, wherein all contained # instructions do not branch except for the last one. class BasicBlock @@ -28,10 +86,10 @@ class BasicBlock attr_reader :insns # This is an array of basic blocks that are predecessors to this block. - attr_reader :preds + attr_reader :predecessors # This is an array of basic blocks that are successors to this block. - attr_reader :succs + attr_reader :successors def initialize(block_start, insns) @id = "block_#{block_start}" @@ -39,8 +97,8 @@ def initialize(block_start, insns) @block_start = block_start @insns = insns - @preds = [] - @succs = [] + @predecessors = [] + @successors = [] end # This method is used to verify that the basic block is well formed. It @@ -122,81 +180,25 @@ def build_basic_blocks end end - # Now we need to connect the blocks by letting them know which blocks - # precede them and which blocks follow them. + # Connect the blocks by letting them know which blocks precede them and + # which blocks succeed them. def connect_basic_blocks(blocks) blocks.each do |block_start, block| insn = block.last if insn.branches? && insn.respond_to?(:label) - block.succs << blocks.fetch(labels[insn.label]) + block.successors << blocks.fetch(labels[insn.label]) end if (!insn.branches? && !insn.leaves?) || insn.falls_through? - block.succs << blocks.fetch(block_start + block.insns.length) + block.successors << blocks.fetch(block_start + block.insns.length) end - block.succs.each { |succ| succ.preds << block } - end - end - end - - # This is the instruction sequence that this control flow graph - # corresponds to. - attr_reader :iseq - - # This is the list of instructions that this control flow graph contains. - # It is effectively the same as the list of instructions in the - # instruction sequence but with line numbers and events filtered out. - attr_reader :insns - - # This is the set of basic blocks that this control-flow graph contains. - attr_reader :blocks - - def initialize(iseq, insns, blocks) - @iseq = iseq - @insns = insns - @blocks = blocks - end - - def disasm - fmt = Disassembler.new - output = StringIO.new - output.puts "== cfg #{iseq.name}" - - blocks.each do |block| - output.print(block.id) - - unless block.preds.empty? - output.print(" # from: #{block.preds.map(&:id).join(", ")}") - end - - output.puts - - block.insns.each do |insn| - output.print(" ") - output.puts(insn.disasm(fmt)) + block.successors.each do |successor| + successor.predecessors << block + end end - - succs = block.succs.map(&:id) - succs << "leaves" if block.last.leaves? - output.print(" # to: #{succs.join(", ")}") unless succs.empty? - - output.puts end - - output.string - end - - # This method is used to verify that the control flow graph is well - # formed. It does this by checking that each basic block is itself well - # formed. - def verify - blocks.each(&:verify) - end - - def self.compile(iseq) - Compiler.new(iseq).compile end end end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index b028c521..13089dc7 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -108,7 +108,7 @@ def self.compile(cfg) until stack.empty? succ = stack.pop succ_flow = block_flows.fetch(succ.block_start) - succ.preds.each do |pred| + succ.predecessors.each do |pred| pred_flow = block_flows.fetch(pred.block_start) # Does a predecessor block have fewer outputs than the successor @@ -139,12 +139,12 @@ def self.compile(cfg) cfg.blocks.each do |pred| pred_flow = block_flows.fetch(pred.block_start) - if pred.succs.empty? + if pred.successors.empty? # With no successors, there should be no output arguments. raise unless pred_flow.out.empty? else # Check with successor... - pred.succs.each do |succ| + pred.successors.each do |succ| succ_flow = block_flows.fetch(succ.block_start) # The predecessor should have as many output arguments as the @@ -165,8 +165,8 @@ def disasm cfg.blocks.each do |block| output.print(block.id) - unless block.preds.empty? - output.print(" # from: #{block.preds.map(&:id).join(", ")}") + unless block.predecessors.empty? + output.print(" # from: #{block.predecessors.map(&:id).join(", ")}") end output.puts @@ -198,9 +198,9 @@ def disasm output.puts end - succs = block.succs.map(&:id) - succs << "leaves" if block.last.leaves? - output.puts(" # to: #{succs.join(", ")}") unless succs.empty? + successors = block.successors.map(&:id) + successors << "leaves" if block.last.leaves? + output.puts(" # to: #{successors.join(", ")}") unless successors.empty? unless block_flow.out.empty? output.puts " # out: #{block_flow.out.join(", ")}" @@ -211,4 +211,4 @@ def disasm end end end -end \ No newline at end of file +end From 7088c153057d92bbb03feb5120214fcfcdd553ea Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 16:57:23 -0500 Subject: [PATCH 06/14] Support multiple branch targets per instruction --- lib/syntax_tree/yarv/control_flow_graph.rb | 17 +++++++----- lib/syntax_tree/yarv/instructions.rb | 30 ++++++++++------------ lib/syntax_tree/yarv/legacy.rb | 4 +-- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index fa9823f1..1d271768 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -105,7 +105,7 @@ def initialize(block_start, insns) # checks that the only instruction in this basic block that branches is # the last instruction. def verify - insns[0...-1].each { |insn| raise if insn.branches? } + insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } end def last @@ -157,8 +157,13 @@ def find_basic_block_starts block_starts = Set.new([0]) insns.each_with_index do |insn, index| - if insn.branches? - block_starts.add(labels[insn.label]) if insn.respond_to?(:label) + branch_targets = insn.branch_targets + + if branch_targets.any? + branch_targets.each do |branch_target| + block_starts.add(labels[branch_target]) + end + block_starts.add(index + 1) if insn.falls_through? end end @@ -186,11 +191,11 @@ def connect_basic_blocks(blocks) blocks.each do |block_start, block| insn = block.last - if insn.branches? && insn.respond_to?(:label) - block.successors << blocks.fetch(labels[insn.label]) + insn.branch_targets.each do |branch_target| + block.successors << blocks.fetch(labels[branch_target]) end - if (!insn.branches? && !insn.leaves?) || insn.falls_through? + if (insn.branch_targets.empty? && !insn.leaves?) || insn.falls_through? block.successors << blocks.fetch(block_start + block.insns.length) end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index c387e763..97ccce15 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -90,9 +90,9 @@ def pops 0 end - # Whether or not this instruction is a branch instruction. - def branches? - false + # This returns an array of labels. + def branch_targets + [] end # Whether or not this instruction leaves the current frame. @@ -261,8 +261,8 @@ def call(vm) vm.jump(label) if vm.pop end - def branches? - true + def branch_targets + [label] end def falls_through? @@ -322,8 +322,8 @@ def call(vm) vm.jump(label) if vm.pop.nil? end - def branches? - true + def branch_targets + [label] end def falls_through? @@ -382,8 +382,8 @@ def call(vm) vm.jump(label) unless vm.pop end - def branches? - true + def branch_targets + [label] end def falls_through? @@ -2237,8 +2237,8 @@ def call(vm) vm.jump(label) end - def branches? - true + def branch_targets + [label] end end @@ -2283,10 +2283,6 @@ def call(vm) vm.leave end - def branches? - true - end - def leaves? true end @@ -2998,8 +2994,8 @@ def call(vm) vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) end - def branches? - true + def branch_targets + case_dispatch_hash.values.push(else_label) end def falls_through? diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 8e12ff16..e20729d9 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -115,8 +115,8 @@ def call(vm) vm.push(nil) end - def branches? - true + def branch_targets + [label] end def falls_through? From b8dc90189aeb476913d8e12f2304b7223f5ccba9 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 16:58:34 -0500 Subject: [PATCH 07/14] Remove BasicBlock.last --- lib/syntax_tree/yarv/control_flow_graph.rb | 8 ++------ lib/syntax_tree/yarv/data_flow_graph.rb | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 1d271768..1761127c 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -52,7 +52,7 @@ def disasm end successors = block.successors.map(&:id) - successors << "leaves" if block.last.leaves? + successors << "leaves" if block.insns.last.leaves? output.print(" # to: #{successors.join(", ")}") unless successors.empty? output.puts @@ -107,10 +107,6 @@ def initialize(block_start, insns) def verify insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } end - - def last - insns.last - end end # This class is responsible for creating a control flow graph from the @@ -189,7 +185,7 @@ def build_basic_blocks # which blocks succeed them. def connect_basic_blocks(blocks) blocks.each do |block_start, block| - insn = block.last + insn = block.insns.last insn.branch_targets.each do |branch_target| block.successors << blocks.fetch(labels[branch_target]) diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 13089dc7..2af51883 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -199,7 +199,7 @@ def disasm end successors = block.successors.map(&:id) - successors << "leaves" if block.last.leaves? + successors << "leaves" if block.insns.last.leaves? output.puts(" # to: #{successors.join(", ")}") unless successors.empty? unless block_flow.out.empty? From 92cbfcae048c6867d0d5a6db5265591ed0b53076 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 17:03:49 -0500 Subject: [PATCH 08/14] Provide BasicBlock.each_with_index --- lib/syntax_tree/yarv/control_flow_graph.rb | 7 ++++++- lib/syntax_tree/yarv/data_flow_graph.rb | 20 ++++++++++---------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 1761127c..5b4b5605 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -101,6 +101,12 @@ def initialize(block_start, insns) @successors = [] end + # Yield each instruction in this basic block along with its index from + # the original instruction sequence. + def each_with_index(&block) + insns.each.with_index(block_start, &block) + end + # This method is used to verify that the basic block is well formed. It # checks that the only instruction in this basic block that branches is # the last instruction. @@ -171,7 +177,6 @@ def find_basic_block_starts # block. They are keyed by the index of their first instruction. def build_basic_blocks block_starts = find_basic_block_starts - blocks = {} block_starts.each_with_index.to_h do |block_start, block_index| block_end = (block_starts[(block_index + 1)..] + [insns.length]).min diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 2af51883..295308bd 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -38,19 +38,19 @@ def self.compile(cfg) # blocks. block_flows = {} cfg.blocks.each do |block| - block_flows[block.block_start] = DataFlow.new + block_flows[block.id] = DataFlow.new end # Now, discover the data flow within each basic block. Using an abstract # stack, connect from consumers of data to the producers of that data. cfg.blocks.each do |block| - block_flow = block_flows.fetch(block.block_start) + block_flow = block_flows.fetch(block.id) stack = [] stack_initial_depth = 0 # Go through each instruction in the block... - block.insns.each.with_index(block.block_start) do |insn, index| + block.each_with_index do |insn, index| insn_flow = insn_flows[index] # How many values will be missing from the local stack to run this @@ -107,9 +107,9 @@ def self.compile(cfg) stack = [*cfg.blocks] until stack.empty? succ = stack.pop - succ_flow = block_flows.fetch(succ.block_start) + succ_flow = block_flows.fetch(succ.id) succ.predecessors.each do |pred| - pred_flow = block_flows.fetch(pred.block_start) + pred_flow = block_flows.fetch(pred.id) # Does a predecessor block have fewer outputs than the successor # has inputs? @@ -132,12 +132,12 @@ def self.compile(cfg) # Verify that we constructed the data flow graph correctly. Check that # the first block has no arguments. - raise unless block_flows.fetch(cfg.blocks.first.block_start).in.empty? + raise unless block_flows.fetch(cfg.blocks.first.id).in.empty? # Check all control flow edges between blocks pass the right number of # arguments. cfg.blocks.each do |pred| - pred_flow = block_flows.fetch(pred.block_start) + pred_flow = block_flows.fetch(pred.id) if pred.successors.empty? # With no successors, there should be no output arguments. @@ -145,7 +145,7 @@ def self.compile(cfg) else # Check with successor... pred.successors.each do |succ| - succ_flow = block_flows.fetch(succ.block_start) + succ_flow = block_flows.fetch(succ.id) # The predecessor should have as many output arguments as the # success has input arguments. @@ -170,12 +170,12 @@ def disasm end output.puts - block_flow = block_flows.fetch(block.block_start) + block_flow = block_flows.fetch(block.id) unless block_flow.in.empty? output.puts " # in: #{block_flow.in.join(", ")}" end - block.insns.each.with_index(block.block_start) do |insn, index| + block.each_with_index do |insn, index| output.print(" ") output.print(insn.disasm(fmt)) From 439ffb6336f9af6c2386c291bb529488c6d79d03 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 18:27:03 -0500 Subject: [PATCH 09/14] Refactor various graphs --- lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/basic_block.rb | 47 ++++ lib/syntax_tree/yarv/control_flow_graph.rb | 65 +---- lib/syntax_tree/yarv/data_flow_graph.rb | 296 +++++++++++---------- 4 files changed, 218 insertions(+), 191 deletions(-) create mode 100644 lib/syntax_tree/yarv/basic_block.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index c6f1223b..e0e2a6be 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -29,6 +29,7 @@ require_relative "syntax_tree/index" require_relative "syntax_tree/yarv" +require_relative "syntax_tree/yarv/basic_block" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" require_relative "syntax_tree/yarv/control_flow_graph" diff --git a/lib/syntax_tree/yarv/basic_block.rb b/lib/syntax_tree/yarv/basic_block.rb new file mode 100644 index 00000000..774a4c00 --- /dev/null +++ b/lib/syntax_tree/yarv/basic_block.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This object represents a single basic block, wherein all contained + # instructions do not branch except for the last one. + class BasicBlock + # This is the unique identifier for this basic block. + attr_reader :id + + # This is the index into the list of instructions where this block starts. + attr_reader :block_start + + # This is the set of instructions that this block contains. + attr_reader :insns + + # This is an array of basic blocks that lead into this block. + attr_reader :incoming_blocks + + # This is an array of basic blocks that this block leads into. + attr_reader :outgoing_blocks + + def initialize(block_start, insns) + @id = "block_#{block_start}" + + @block_start = block_start + @insns = insns + + @incoming_blocks = [] + @outgoing_blocks = [] + end + + # Yield each instruction in this basic block along with its index from the + # original instruction sequence. + def each_with_index(&block) + insns.each.with_index(block_start, &block) + end + + # This method is used to verify that the basic block is well formed. It + # checks that the only instruction in this basic block that branches is + # the last instruction. + def verify + insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } + end + end + end +end diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 5b4b5605..27df308e 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -40,8 +40,8 @@ def disasm blocks.each do |block| output.print(block.id) - unless block.predecessors.empty? - output.print(" # from: #{block.predecessors.map(&:id).join(", ")}") + unless block.incoming_blocks.empty? + output.print(" # from: #{block.incoming_blocks.map(&:id).join(", ")}") end output.puts @@ -51,9 +51,9 @@ def disasm output.puts(insn.disasm(fmt)) end - successors = block.successors.map(&:id) - successors << "leaves" if block.insns.last.leaves? - output.print(" # to: #{successors.join(", ")}") unless successors.empty? + dests = block.outgoing_blocks.map(&:id) + dests << "leaves" if block.insns.last.leaves? + output.print(" # to: #{dests.join(", ")}") unless dests.empty? output.puts end @@ -72,49 +72,6 @@ def self.compile(iseq) Compiler.new(iseq).compile end - # This object represents a single basic block, wherein all contained - # instructions do not branch except for the last one. - class BasicBlock - # This is the unique identifier for this basic block. - attr_reader :id - - # This is the index into the list of instructions where this block - # starts. - attr_reader :block_start - - # This is the set of instructions that this block contains. - attr_reader :insns - - # This is an array of basic blocks that are predecessors to this block. - attr_reader :predecessors - - # This is an array of basic blocks that are successors to this block. - attr_reader :successors - - def initialize(block_start, insns) - @id = "block_#{block_start}" - - @block_start = block_start - @insns = insns - - @predecessors = [] - @successors = [] - end - - # Yield each instruction in this basic block along with its index from - # the original instruction sequence. - def each_with_index(&block) - insns.each.with_index(block_start, &block) - end - - # This method is used to verify that the basic block is well formed. It - # checks that the only instruction in this basic block that branches is - # the last instruction. - def verify - insns[0...-1].each { |insn| raise unless insn.branch_targets.empty? } - end - end - # This class is responsible for creating a control flow graph from the # given instruction sequence. class Compiler @@ -186,22 +143,22 @@ def build_basic_blocks end end - # Connect the blocks by letting them know which blocks precede them and - # which blocks succeed them. + # Connect the blocks by letting them know which blocks are incoming and + # outgoing from each block. def connect_basic_blocks(blocks) blocks.each do |block_start, block| insn = block.insns.last insn.branch_targets.each do |branch_target| - block.successors << blocks.fetch(labels[branch_target]) + block.outgoing_blocks << blocks.fetch(labels[branch_target]) end if (insn.branch_targets.empty? && !insn.leaves?) || insn.falls_through? - block.successors << blocks.fetch(block_start + block.insns.length) + block.outgoing_blocks << blocks.fetch(block_start + block.insns.length) end - block.successors.each do |successor| - successor.predecessors << block + block.outgoing_blocks.each do |outgoing_block| + outgoing_block.incoming_blocks << block end end end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 295308bd..737518ce 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -26,138 +26,6 @@ def initialize(cfg, insn_flows, block_flows) @block_flows = block_flows end - def self.compile(cfg) - # First, create a data structure to encode data flow between - # instructions. - insn_flows = {} - cfg.insns.each_with_index do |insn, index| - insn_flows[index] = DataFlow.new - end - - # Next, create a data structure to encode data flow between basic - # blocks. - block_flows = {} - cfg.blocks.each do |block| - block_flows[block.id] = DataFlow.new - end - - # Now, discover the data flow within each basic block. Using an abstract - # stack, connect from consumers of data to the producers of that data. - cfg.blocks.each do |block| - block_flow = block_flows.fetch(block.id) - - stack = [] - stack_initial_depth = 0 - - # Go through each instruction in the block... - block.each_with_index do |insn, index| - insn_flow = insn_flows[index] - - # How many values will be missing from the local stack to run this - # instruction? - missing_stack_values = insn.pops - stack.size - - # For every value the instruction pops off the stack... - insn.pops.times do - # Was the value it pops off from another basic block? - if stack.empty? - # This is a basic block argument. - name = :"in_#{missing_stack_values - 1}" - - insn_flow.in.unshift(name) - block_flow.in.unshift(name) - - stack_initial_depth += 1 - missing_stack_values -= 1 - else - # Connect this consumer to the producer of the value. - insn_flow.in.unshift(stack.pop) - end - end - - # Record on our abstract stack that this instruction pushed - # this value onto the stack. - insn.pushes.times { stack << index } - end - - # Values that are left on the stack after going through all - # instructions are arguments to the basic block that we jump to. - stack.reverse_each.with_index do |producer, index| - block_flow.out << producer - insn_flows[producer].out << :"out_#{index}" - end - end - - # Go backwards and connect from producers to consumers. - cfg.insns.each_with_index do |insn, index| - # For every instruction that produced a value used in this - # instruction... - insn_flows[index].in.each do |producer| - # If it's actually another instruction and not a basic block - # argument... - if producer.is_a?(Integer) - # Record in the producing instruction that it produces a value - # used by this construction. - insn_flows[producer].out << index - end - end - end - - # Now, discover the data flow between basic blocks. - stack = [*cfg.blocks] - until stack.empty? - succ = stack.pop - succ_flow = block_flows.fetch(succ.id) - succ.predecessors.each do |pred| - pred_flow = block_flows.fetch(pred.id) - - # Does a predecessor block have fewer outputs than the successor - # has inputs? - if pred_flow.out.size < succ_flow.in.size - # If so then add arguments to pass data through from the - # predecessor's predecessors. - (succ_flow.in.size - pred_flow.out.size).times do |index| - name = :"pass_#{index}" - pred_flow.in.unshift(name) - pred_flow.out.unshift(name) - end - - # Since we modified the predecessor, add it back to the worklist - # so it'll be considered as a successor again, and propogate the - # global data flow back up the control flow graph. - stack << pred - end - end - end - - # Verify that we constructed the data flow graph correctly. Check that - # the first block has no arguments. - raise unless block_flows.fetch(cfg.blocks.first.id).in.empty? - - # Check all control flow edges between blocks pass the right number of - # arguments. - cfg.blocks.each do |pred| - pred_flow = block_flows.fetch(pred.id) - - if pred.successors.empty? - # With no successors, there should be no output arguments. - raise unless pred_flow.out.empty? - else - # Check with successor... - pred.successors.each do |succ| - succ_flow = block_flows.fetch(succ.id) - - # The predecessor should have as many output arguments as the - # success has input arguments. - raise unless pred_flow.out.size == succ_flow.in.size - end - end - end - - # Finally we can return the data flow graph. - new(cfg, insn_flows, block_flows) - end - def disasm fmt = Disassembler.new output = StringIO.new @@ -165,8 +33,9 @@ def disasm cfg.blocks.each do |block| output.print(block.id) - unless block.predecessors.empty? - output.print(" # from: #{block.predecessors.map(&:id).join(", ")}") + unless block.incoming_blocks.empty? + srcs = block.incoming_blocks.map(&:id) + output.print(" # from: #{srcs.join(", ")}") end output.puts @@ -198,9 +67,9 @@ def disasm output.puts end - successors = block.successors.map(&:id) - successors << "leaves" if block.insns.last.leaves? - output.puts(" # to: #{successors.join(", ")}") unless successors.empty? + dests = block.outgoing_blocks.map(&:id) + dests << "leaves" if block.insns.last.leaves? + output.puts(" # to: #{dests.join(", ")}") unless dests.empty? unless block_flow.out.empty? output.puts " # out: #{block_flow.out.join(", ")}" @@ -209,6 +78,159 @@ def disasm output.string end + + # Verify that we constructed the data flow graph correctly. + def verify + # Check that the first block has no arguments. + raise unless block_flows.fetch(cfg.blocks.first.id).in.empty? + + # Check all control flow edges between blocks pass the right number of + # arguments. + cfg.blocks.each do |block| + block_flow = block_flows.fetch(block.id) + + if block.outgoing_blocks.empty? + # With no outgoing blocks, there should be no output arguments. + raise unless block_flow.out.empty? + else + # Check with outgoing blocks... + block.outgoing_blocks.each do |outgoing_block| + outgoing_flow = block_flows.fetch(outgoing_block.id) + + # The block should have as many output arguments as the + # outgoing block has input arguments. + raise unless block_flow.out.size == outgoing_flow.in.size + end + end + end + end + + def self.compile(cfg) + Compiler.new(cfg).compile + end + + # This class is responsible for creating a data flow graph from the given + # control flow graph. + class Compiler + attr_reader :cfg, :insn_flows, :block_flows + + def initialize(cfg) + @cfg = cfg + + # This data structure will hold the data flow between instructions + # within individual basic blocks. + @insn_flows = {} + cfg.insns.each_with_index do |insn, index| + @insn_flows[index] = DataFlow.new + end + + # This data structure will hold the data flow between basic blocks. + @block_flows = {} + cfg.blocks.each do |block| + @block_flows[block.id] = DataFlow.new + end + end + + def compile + find_local_flow + find_global_flow + DataFlowGraph.new(cfg, insn_flows, block_flows).tap(&:verify) + end + + private + + # Find the data flow within each basic block. Using an abstract stack, + # connect from consumers of data to the producers of that data. + def find_local_flow + cfg.blocks.each do |block| + block_flow = block_flows.fetch(block.id) + stack = [] + + # Go through each instruction in the block... + block.each_with_index do |insn, index| + insn_flow = insn_flows[index] + + # How many values will be missing from the local stack to run this + # instruction? + missing = insn.pops - stack.size + + # For every value the instruction pops off the stack... + insn.pops.times do + # Was the value it pops off from another basic block? + if stack.empty? + # This is a basic block argument. + missing -= 1 + name = :"in_#{missing}" + + insn_flow.in.unshift(name) + block_flow.in.unshift(name) + else + # Connect this consumer to the producer of the value. + insn_flow.in.unshift(stack.pop) + end + end + + # Record on our abstract stack that this instruction pushed + # this value onto the stack. + insn.pushes.times { stack << index } + end + + # Values that are left on the stack after going through all + # instructions are arguments to the basic block that we jump to. + stack.reverse_each.with_index do |producer, index| + block_flow.out << producer + insn_flows[producer].out << :"out_#{index}" + end + end + + # Go backwards and connect from producers to consumers. + cfg.insns.each_with_index do |insn, index| + # For every instruction that produced a value used in this + # instruction... + insn_flows[index].in.each do |producer| + # If it's actually another instruction and not a basic block + # argument... + if producer.is_a?(Integer) + # Record in the producing instruction that it produces a value + # used by this construction. + insn_flows[producer].out << index + end + end + end + end + + # Find the data that flows between basic blocks. + def find_global_flow + stack = [*cfg.blocks] + + until stack.empty? + block = stack.pop + block_flow = block_flows.fetch(block.id) + + block.incoming_blocks.each do |incoming_block| + incoming_flow = block_flows.fetch(incoming_block.id) + + # Does a predecessor block have fewer outputs than the successor + # has inputs? + if incoming_flow.out.size < block_flow.in.size + # If so then add arguments to pass data through from the + # incoming block's incoming blocks. + (block_flow.in.size - incoming_flow.out.size).times do |index| + name = :"pass_#{index}" + + incoming_flow.in.unshift(name) + incoming_flow.out.unshift(name) + end + + # Since we modified the incoming block, add it back to the stack + # so it'll be considered as an outgoing block again, and + # propogate the global data flow back up the control flow graph. + stack << incoming_block + end + end + end + end + end end end end From f600b0694e2c64bb4c6ce7d0d29d60533fdc1ab6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 20:07:06 -0500 Subject: [PATCH 10/14] Properly use the disassembler for the cfg --- lib/syntax_tree/yarv/control_flow_graph.rb | 22 ++-- lib/syntax_tree/yarv/disassembler.rb | 112 ++++++++++--------- lib/syntax_tree/yarv/instruction_sequence.rb | 7 +- test/yarv_test.rb | 20 ++-- 4 files changed, 82 insertions(+), 79 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 27df308e..3b3f9b82 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -33,32 +33,28 @@ def initialize(iseq, insns, blocks) end def disasm - fmt = Disassembler.new - output = StringIO.new - output.puts "== cfg #{iseq.name}" + fmt = Disassembler.new(iseq) + fmt.output.puts "== cfg #{iseq.name}" blocks.each do |block| - output.print(block.id) + fmt.output.print(block.id) unless block.incoming_blocks.empty? - output.print(" # from: #{block.incoming_blocks.map(&:id).join(", ")}") + fmt.output.print(" # from: #{block.incoming_blocks.map(&:id).join(", ")}") end - output.puts + fmt.output.puts - block.insns.each do |insn| - output.print(" ") - output.puts(insn.disasm(fmt)) - end + fmt.with_prefix(" ") { fmt.format_insns!(block.insns) } dests = block.outgoing_blocks.map(&:id) dests << "leaves" if block.insns.last.leaves? - output.print(" # to: #{dests.join(", ")}") unless dests.empty? + fmt.output.print(" # to: #{dests.join(", ")}") unless dests.empty? - output.puts + fmt.output.puts end - output.string + fmt.string end # This method is used to verify that the control flow graph is well diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index d303bcb7..0b445e02 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -4,15 +4,16 @@ module SyntaxTree module YARV class Disassembler attr_reader :output, :queue + attr_reader :current_prefix attr_accessor :current_iseq - def initialize + def initialize(current_iseq = nil) @output = StringIO.new @queue = [] @current_prefix = "" - @current_iseq = nil + @current_iseq = current_iseq end ######################################################################## @@ -97,16 +98,69 @@ def object(value) end ######################################################################## - # Main entrypoint + # Entrypoints ######################################################################## + def string + output.string + end + def format! while (@current_iseq = queue.shift) output << "\n" if output.pos > 0 format_iseq(@current_iseq) end + end - output.string + def format_insns!(insns, length = 0) + events = [] + lines = [] + + insns.each do |insn| + case insn + when Integer + lines << insn + when Symbol + events << event(insn) + when InstructionSequence::Label + # skip + else + output << "#{current_prefix}%04d " % length + + disasm = insn.disasm(self) + output << disasm + + if lines.any? + output << " " * (65 - disasm.length) if disasm.length < 65 + elsif events.any? + output << " " * (39 - disasm.length) if disasm.length < 39 + end + + if lines.any? + output << "(%4d)" % lines.last + lines.clear + end + + if events.any? + output << "[#{events.join}]" + events.clear + end + + output << "\n" + length += insn.length + end + end + end + + def with_prefix(value) + previous = @current_prefix + + begin + @current_prefix = value + yield + ensure + @current_prefix = previous + end end private @@ -157,55 +211,7 @@ def format_iseq(iseq) output << "#{current_prefix}#{locals.join(" ")}\n" end - length = 0 - events = [] - lines = [] - - iseq.insns.each do |insn| - case insn - when Integer - lines << insn - when Symbol - events << event(insn) - when InstructionSequence::Label - # skip - else - output << "#{current_prefix}%04d " % length - - disasm = insn.disasm(self) - output << disasm - - if lines.any? - output << " " * (65 - disasm.length) if disasm.length < 65 - elsif events.any? - output << " " * (39 - disasm.length) if disasm.length < 39 - end - - if lines.any? - output << "(%4d)" % lines.last - lines.clear - end - - if events.any? - output << "[#{events.join}]" - events.clear - end - - output << "\n" - length += insn.length - end - end - end - - def with_prefix(value) - previous = @current_prefix - - begin - @current_prefix = value - yield - ensure - @current_prefix = previous - end + format_insns!(iseq.insns) end end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 6aa7279e..1281eba4 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -270,9 +270,10 @@ def to_a end def disasm - disassembler = Disassembler.new - disassembler.enqueue(self) - disassembler.format! + fmt = Disassembler.new + fmt.enqueue(self) + fmt.format! + fmt.string end # This method converts our linked list of instructions into a final array diff --git a/test/yarv_test.rb b/test/yarv_test.rb index e37afb63..91147dc3 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -305,22 +305,22 @@ def test_cfg assert_equal(<<~CFG, cfg.disasm) == cfg block_0 - putobject 100 - putobject 14 - putobject_INT2FIX_0_ - opt_lt - branchunless 13 + 0000 putobject 100 + 0002 putobject 14 + 0004 putobject_INT2FIX_0_ + 0005 opt_lt + 0007 branchunless 13 # to: block_7, block_5 block_5 # from: block_0 - putobject -1 - jump 14 + 0000 putobject -1 + 0002 jump 14 # to: block_8 block_7 # from: block_0 - putobject_INT2FIX_1_ + 0000 putobject_INT2FIX_1_ # to: block_8 block_8 # from: block_5, block_7 - opt_plus - leave + 0000 opt_plus + 0002 leave # to: leaves CFG end From d66c977eb37d7f01f3221fdc0bcde086e56e1b8e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 20:43:29 -0500 Subject: [PATCH 11/14] Use length for offsets to make it more readable --- lib/syntax_tree/yarv/basic_block.rb | 10 +++- lib/syntax_tree/yarv/control_flow_graph.rb | 69 +++++++++++++--------- lib/syntax_tree/yarv/data_flow_graph.rb | 20 +++---- test/yarv_test.rb | 57 +++++++++--------- 4 files changed, 90 insertions(+), 66 deletions(-) diff --git a/lib/syntax_tree/yarv/basic_block.rb b/lib/syntax_tree/yarv/basic_block.rb index 774a4c00..6798a092 100644 --- a/lib/syntax_tree/yarv/basic_block.rb +++ b/lib/syntax_tree/yarv/basic_block.rb @@ -32,8 +32,14 @@ def initialize(block_start, insns) # Yield each instruction in this basic block along with its index from the # original instruction sequence. - def each_with_index(&block) - insns.each.with_index(block_start, &block) + def each_with_length + return enum_for(:each_with_length) unless block_given? + + length = block_start + insns.each do |insn| + yield insn, length + length += insn.length + end end # This method is used to verify that the basic block is well formed. It diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 3b3f9b82..bcf9f26e 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -34,24 +34,23 @@ def initialize(iseq, insns, blocks) def disasm fmt = Disassembler.new(iseq) - fmt.output.puts "== cfg #{iseq.name}" + fmt.output.print("== cfg: #:1 ") + fmt.output.puts("(#{iseq.line},0)-(#{iseq.line},0)>") blocks.each do |block| - fmt.output.print(block.id) - - unless block.incoming_blocks.empty? - fmt.output.print(" # from: #{block.incoming_blocks.map(&:id).join(", ")}") - end - - fmt.output.puts - - fmt.with_prefix(" ") { fmt.format_insns!(block.insns) } + fmt.output.puts(block.id) + fmt.with_prefix(" ") do + unless block.incoming_blocks.empty? + from = block.incoming_blocks.map(&:id).join(", ") + fmt.output.puts("#{fmt.current_prefix}== from: #{from}") + end - dests = block.outgoing_blocks.map(&:id) - dests << "leaves" if block.insns.last.leaves? - fmt.output.print(" # to: #{dests.join(", ")}") unless dests.empty? + fmt.format_insns!(block.insns, block.block_start) - fmt.output.puts + to = block.outgoing_blocks.map(&:id) + to << "leaves" if block.insns.last.leaves? + fmt.output.puts("#{fmt.current_prefix}== to: #{to.join(", ")}") + end end fmt.string @@ -71,23 +70,34 @@ def self.compile(iseq) # This class is responsible for creating a control flow graph from the # given instruction sequence. class Compiler - attr_reader :iseq, :labels, :insns + # This is the instruction sequence that is being compiled. + attr_reader :iseq + + # This is a hash of indices in the YARV instruction sequence that point + # to their corresponding instruction. + attr_reader :insns + + # This is a hash of labels that point to their corresponding index into + # the YARV instruction sequence. Note that this is not the same as the + # index into the list of instructions on the instruction sequence + # object. Instead, this is the index into the C array, so it includes + # operands. + attr_reader :labels def initialize(iseq) @iseq = iseq - # We need to find all of the instructions that immediately follow - # labels so that when we are looking at instructions that branch we - # know where they branch to. + @insns = {} @labels = {} - @insns = [] + length = 0 iseq.insns.each do |insn| case insn when Instruction - @insns << insn + @insns[length] = insn + length += insn.length when InstructionSequence::Label - @labels[insn] = @insns.length + @labels[insn] = length end end end @@ -111,7 +121,7 @@ def compile def find_basic_block_starts block_starts = Set.new([0]) - insns.each_with_index do |insn, index| + insns.each do |index, insn| branch_targets = insn.branch_targets if branch_targets.any? @@ -119,7 +129,7 @@ def find_basic_block_starts block_starts.add(labels[branch_target]) end - block_starts.add(index + 1) if insn.falls_through? + block_starts.add(index + insn.length) if insn.falls_through? end end @@ -131,10 +141,14 @@ def find_basic_block_starts def build_basic_blocks block_starts = find_basic_block_starts - block_starts.each_with_index.to_h do |block_start, block_index| - block_end = (block_starts[(block_index + 1)..] + [insns.length]).min - block_insns = insns[block_start...block_end] + length = 0 + blocks = + iseq.insns.grep(Instruction).slice_after do |insn| + length += insn.length + block_starts.include?(length) + end + block_starts.zip(blocks).to_h do |block_start, block_insns| [block_start, BasicBlock.new(block_start, block_insns)] end end @@ -150,7 +164,8 @@ def connect_basic_blocks(blocks) end if (insn.branch_targets.empty? && !insn.leaves?) || insn.falls_through? - block.outgoing_blocks << blocks.fetch(block_start + block.insns.length) + fall_through_start = block_start + block.insns.sum(&:length) + block.outgoing_blocks << blocks.fetch(fall_through_start) end block.outgoing_blocks.each do |outgoing_block| diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 737518ce..670e0daf 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -44,11 +44,11 @@ def disasm output.puts " # in: #{block_flow.in.join(", ")}" end - block.each_with_index do |insn, index| + block.each_with_length do |insn, length| output.print(" ") output.print(insn.disasm(fmt)) - insn_flow = insn_flows[index] + insn_flow = insn_flows[length] if insn_flow.in.empty? && insn_flow.out.empty? output.puts next @@ -120,8 +120,8 @@ def initialize(cfg) # This data structure will hold the data flow between instructions # within individual basic blocks. @insn_flows = {} - cfg.insns.each_with_index do |insn, index| - @insn_flows[index] = DataFlow.new + cfg.insns.each_key do |length| + @insn_flows[length] = DataFlow.new end # This data structure will hold the data flow between basic blocks. @@ -147,8 +147,8 @@ def find_local_flow stack = [] # Go through each instruction in the block... - block.each_with_index do |insn, index| - insn_flow = insn_flows[index] + block.each_with_length do |insn, length| + insn_flow = insn_flows[length] # How many values will be missing from the local stack to run this # instruction? @@ -172,7 +172,7 @@ def find_local_flow # Record on our abstract stack that this instruction pushed # this value onto the stack. - insn.pushes.times { stack << index } + insn.pushes.times { stack << length } end # Values that are left on the stack after going through all @@ -184,16 +184,16 @@ def find_local_flow end # Go backwards and connect from producers to consumers. - cfg.insns.each_with_index do |insn, index| + cfg.insns.each_key do |length| # For every instruction that produced a value used in this # instruction... - insn_flows[index].in.each do |producer| + insn_flows[length].in.each do |producer| # If it's actually another instruction and not a basic block # argument... if producer.is_a?(Integer) # Record in the producing instruction that it produces a value # used by this construction. - insn_flows[producer].out << index + insn_flows[producer].out << length end end end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 91147dc3..7a998fa4 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -303,25 +303,28 @@ def test_cfg cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) assert_equal(<<~CFG, cfg.disasm) - == cfg + == cfg: #@:1 (1,0)-(1,0)> block_0 0000 putobject 100 0002 putobject 14 0004 putobject_INT2FIX_0_ 0005 opt_lt 0007 branchunless 13 - # to: block_7, block_5 - block_5 # from: block_0 - 0000 putobject -1 - 0002 jump 14 - # to: block_8 - block_7 # from: block_0 - 0000 putobject_INT2FIX_1_ - # to: block_8 - block_8 # from: block_5, block_7 - 0000 opt_plus - 0002 leave - # to: leaves + == to: block_13, block_9 + block_9 + == from: block_0 + 0009 putobject -1 + 0011 jump 14 + == to: block_14 + block_13 + == from: block_0 + 0013 putobject_INT2FIX_1_ + == to: block_14 + block_14 + == from: block_9, block_13 + 0014 opt_plus + 0016 leave + == to: leaves CFG end @@ -335,27 +338,27 @@ def test_dfg == dfg block_0 putobject 100 # out: out_0 - putobject 14 # out: 3 - putobject_INT2FIX_0_ # out: 3 - opt_lt # in: 1, 2; out: 4 - branchunless 13 # in: 3 - # to: block_7, block_5 + putobject 14 # out: 5 + putobject_INT2FIX_0_ # out: 5 + opt_lt # in: 2, 4; out: 7 + branchunless 13 # in: 5 + # to: block_13, block_9 # out: 0 - block_5 # from: block_0 + block_9 # from: block_0 # in: pass_0 putobject -1 # out: out_0 jump 14 - # to: block_8 - # out: pass_0, 5 - block_7 # from: block_0 + # to: block_14 + # out: pass_0, 9 + block_13 # from: block_0 # in: pass_0 putobject_INT2FIX_1_ # out: out_0 - # to: block_8 - # out: pass_0, 7 - block_8 # from: block_5, block_7 + # to: block_14 + # out: pass_0, 13 + block_14 # from: block_9, block_13 # in: in_0, in_1 - opt_plus # in: in_0, in_1; out: 9 - leave # in: 8 + opt_plus # in: in_0, in_1; out: 16 + leave # in: 14 # to: leaves DFG end From 7d1cf1ce3aba3bc1a1251637304f298cb9f84fae Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 20:55:33 -0500 Subject: [PATCH 12/14] Properly use disassembler for DFG --- lib/syntax_tree/yarv/control_flow_graph.rb | 3 +- lib/syntax_tree/yarv/data_flow_graph.rb | 68 +++++++++----------- lib/syntax_tree/yarv/disassembler.rb | 12 ++-- lib/syntax_tree/yarv/instruction_sequence.rb | 4 ++ test/yarv_test.rb | 51 ++++++++------- 5 files changed, 67 insertions(+), 71 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index bcf9f26e..ef779c54 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -34,8 +34,7 @@ def initialize(iseq, insns, blocks) def disasm fmt = Disassembler.new(iseq) - fmt.output.print("== cfg: #:1 ") - fmt.output.puts("(#{iseq.line},0)-(#{iseq.line},0)>") + fmt.output.puts("== cfg: #{iseq.inspect}") blocks.each do |block| fmt.output.puts(block.id) diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 670e0daf..09ba84a4 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -27,56 +27,48 @@ def initialize(cfg, insn_flows, block_flows) end def disasm - fmt = Disassembler.new - output = StringIO.new - output.puts "== dfg #{cfg.iseq.name}" + fmt = Disassembler.new(cfg.iseq) + fmt.output.puts("== dfg: #{cfg.iseq.inspect}") cfg.blocks.each do |block| - output.print(block.id) - unless block.incoming_blocks.empty? - srcs = block.incoming_blocks.map(&:id) - output.print(" # from: #{srcs.join(", ")}") - end - output.puts - - block_flow = block_flows.fetch(block.id) - unless block_flow.in.empty? - output.puts " # in: #{block_flow.in.join(", ")}" - end - - block.each_with_length do |insn, length| - output.print(" ") - output.print(insn.disasm(fmt)) - - insn_flow = insn_flows[length] - if insn_flow.in.empty? && insn_flow.out.empty? - output.puts - next + fmt.output.puts(block.id) + fmt.with_prefix(" ") do + unless block.incoming_blocks.empty? + from = block.incoming_blocks.map(&:id).join(", ") + fmt.output.puts("#{fmt.current_prefix}== from: #{from}") end - output.print(" # ") - unless insn_flow.in.empty? - output.print("in: #{insn_flow.in.join(", ")}") - output.print("; ") unless insn_flow.out.empty? + block_flow = block_flows.fetch(block.id) + unless block_flow.in.empty? + fmt.output.puts("#{fmt.current_prefix}== in: #{block_flow.in.join(", ")}") end - unless insn_flow.out.empty? - output.print("out: #{insn_flow.out.join(", ")}") + fmt.format_insns!(block.insns, block.block_start) do |insn, length| + insn_flow = insn_flows[length] + next if insn_flow.in.empty? && insn_flow.out.empty? + + fmt.output.print(" # ") + unless insn_flow.in.empty? + fmt.output.print("in: #{insn_flow.in.join(", ")}") + fmt.output.print("; ") unless insn_flow.out.empty? + end + + unless insn_flow.out.empty? + fmt.output.print("out: #{insn_flow.out.join(", ")}") + end end - output.puts - end - - dests = block.outgoing_blocks.map(&:id) - dests << "leaves" if block.insns.last.leaves? - output.puts(" # to: #{dests.join(", ")}") unless dests.empty? + to = block.outgoing_blocks.map(&:id) + to << "leaves" if block.insns.last.leaves? + fmt.output.puts("#{fmt.current_prefix}== to: #{to.join(", ")}") - unless block_flow.out.empty? - output.puts " # out: #{block_flow.out.join(", ")}" + unless block_flow.out.empty? + fmt.output.puts("#{fmt.current_prefix}== out: #{block_flow.out.join(", ")}") + end end end - output.string + fmt.string end # Verify that we constructed the data flow graph correctly. diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 0b445e02..8b86851e 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -146,6 +146,10 @@ def format_insns!(insns, length = 0) events.clear end + # A hook here to allow for custom formatting of instructions after + # the main body has been processed. + yield insn, length if block_given? + output << "\n" length += insn.length end @@ -166,13 +170,7 @@ def with_prefix(value) private def format_iseq(iseq) - output << "#{current_prefix}== disasm: " - output << "#:1 " - - location = Location.fixed(line: iseq.line, char: 0, column: 0) - output << "(#{location.start_line},#{location.start_column})-" - output << "(#{location.end_line},#{location.end_column})" - output << "> " + output << "#{current_prefix}== disasm: #{iseq.inspect} " if iseq.catch_table.any? output << "(catch: TRUE)\n" diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 1281eba4..83453837 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -276,6 +276,10 @@ def disasm fmt.string end + def inspect + "#:1 (#{line},#{0})-(#{line},#{0})>" + end + # This method converts our linked list of instructions into a final array # and performs any other compilation steps necessary. def compile! diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 7a998fa4..5ac37504 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -335,31 +335,34 @@ def test_dfg dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) assert_equal(<<~DFG, dfg.disasm) - == dfg + == dfg: #@:1 (1,0)-(1,0)> block_0 - putobject 100 # out: out_0 - putobject 14 # out: 5 - putobject_INT2FIX_0_ # out: 5 - opt_lt # in: 2, 4; out: 7 - branchunless 13 # in: 5 - # to: block_13, block_9 - # out: 0 - block_9 # from: block_0 - # in: pass_0 - putobject -1 # out: out_0 - jump 14 - # to: block_14 - # out: pass_0, 9 - block_13 # from: block_0 - # in: pass_0 - putobject_INT2FIX_1_ # out: out_0 - # to: block_14 - # out: pass_0, 13 - block_14 # from: block_9, block_13 - # in: in_0, in_1 - opt_plus # in: in_0, in_1; out: 16 - leave # in: 14 - # to: leaves + 0000 putobject 100 # out: out_0 + 0002 putobject 14 # out: 5 + 0004 putobject_INT2FIX_0_ # out: 5 + 0005 opt_lt # in: 2, 4; out: 7 + 0007 branchunless 13 # in: 5 + == to: block_13, block_9 + == out: 0 + block_9 + == from: block_0 + == in: pass_0 + 0009 putobject -1 # out: out_0 + 0011 jump 14 + == to: block_14 + == out: pass_0, 9 + block_13 + == from: block_0 + == in: pass_0 + 0013 putobject_INT2FIX_1_ # out: out_0 + == to: block_14 + == out: pass_0, 13 + block_14 + == from: block_9, block_13 + == in: in_0, in_1 + 0014 opt_plus # in: in_0, in_1; out: 16 + 0016 leave # in: 14 + == to: leaves DFG end From 28c5a4ac92745c26590794366f014742bc02eebd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 21:01:49 -0500 Subject: [PATCH 13/14] Various formatting for CFG and DFG --- lib/syntax_tree/yarv/control_flow_graph.rb | 30 +++++++------ lib/syntax_tree/yarv/data_flow_graph.rb | 45 +++++++++----------- lib/syntax_tree/yarv/disassembler.rb | 2 +- lib/syntax_tree/yarv/instruction_sequence.rb | 2 +- 4 files changed, 41 insertions(+), 38 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index ef779c54..fb8f97f3 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -38,17 +38,17 @@ def disasm blocks.each do |block| fmt.output.puts(block.id) - fmt.with_prefix(" ") do + fmt.with_prefix(" ") do |prefix| unless block.incoming_blocks.empty? - from = block.incoming_blocks.map(&:id).join(", ") - fmt.output.puts("#{fmt.current_prefix}== from: #{from}") + from = block.incoming_blocks.map(&:id) + fmt.output.puts("#{prefix}== from: #{from.join(", ")}") end fmt.format_insns!(block.insns, block.block_start) to = block.outgoing_blocks.map(&:id) to << "leaves" if block.insns.last.leaves? - fmt.output.puts("#{fmt.current_prefix}== to: #{to.join(", ")}") + fmt.output.puts("#{prefix}== to: #{to.join(", ")}") end end @@ -142,14 +142,19 @@ def build_basic_blocks length = 0 blocks = - iseq.insns.grep(Instruction).slice_after do |insn| - length += insn.length - block_starts.include?(length) - end + iseq + .insns + .grep(Instruction) + .slice_after do |insn| + length += insn.length + block_starts.include?(length) + end - block_starts.zip(blocks).to_h do |block_start, block_insns| - [block_start, BasicBlock.new(block_start, block_insns)] - end + block_starts + .zip(blocks) + .to_h do |block_start, block_insns| + [block_start, BasicBlock.new(block_start, block_insns)] + end end # Connect the blocks by letting them know which blocks are incoming and @@ -162,7 +167,8 @@ def connect_basic_blocks(blocks) block.outgoing_blocks << blocks.fetch(labels[branch_target]) end - if (insn.branch_targets.empty? && !insn.leaves?) || insn.falls_through? + if (insn.branch_targets.empty? && !insn.leaves?) || + insn.falls_through? fall_through_start = block_start + block.insns.sum(&:length) block.outgoing_blocks << blocks.fetch(fall_through_start) end diff --git a/lib/syntax_tree/yarv/data_flow_graph.rb b/lib/syntax_tree/yarv/data_flow_graph.rb index 09ba84a4..614d1233 100644 --- a/lib/syntax_tree/yarv/data_flow_graph.rb +++ b/lib/syntax_tree/yarv/data_flow_graph.rb @@ -32,27 +32,27 @@ def disasm cfg.blocks.each do |block| fmt.output.puts(block.id) - fmt.with_prefix(" ") do + fmt.with_prefix(" ") do |prefix| unless block.incoming_blocks.empty? - from = block.incoming_blocks.map(&:id).join(", ") - fmt.output.puts("#{fmt.current_prefix}== from: #{from}") + from = block.incoming_blocks.map(&:id) + fmt.output.puts("#{prefix}== from: #{from.join(", ")}") end block_flow = block_flows.fetch(block.id) unless block_flow.in.empty? - fmt.output.puts("#{fmt.current_prefix}== in: #{block_flow.in.join(", ")}") + fmt.output.puts("#{prefix}== in: #{block_flow.in.join(", ")}") end - fmt.format_insns!(block.insns, block.block_start) do |insn, length| + fmt.format_insns!(block.insns, block.block_start) do |_, length| insn_flow = insn_flows[length] next if insn_flow.in.empty? && insn_flow.out.empty? - + fmt.output.print(" # ") unless insn_flow.in.empty? fmt.output.print("in: #{insn_flow.in.join(", ")}") fmt.output.print("; ") unless insn_flow.out.empty? end - + unless insn_flow.out.empty? fmt.output.print("out: #{insn_flow.out.join(", ")}") end @@ -60,11 +60,11 @@ def disasm to = block.outgoing_blocks.map(&:id) to << "leaves" if block.insns.last.leaves? - fmt.output.puts("#{fmt.current_prefix}== to: #{to.join(", ")}") + fmt.output.puts("#{prefix}== to: #{to.join(", ")}") unless block_flow.out.empty? - fmt.output.puts("#{fmt.current_prefix}== out: #{block_flow.out.join(", ")}") - end + fmt.output.puts("#{prefix}== out: #{block_flow.out.join(", ")}") + end end end @@ -104,23 +104,20 @@ def self.compile(cfg) # This class is responsible for creating a data flow graph from the given # control flow graph. class Compiler - attr_reader :cfg, :insn_flows, :block_flows + # This is the control flow graph that is being compiled. + attr_reader :cfg - def initialize(cfg) - @cfg = cfg + # This data structure will hold the data flow between instructions + # within individual basic blocks. + attr_reader :insn_flows - # This data structure will hold the data flow between instructions - # within individual basic blocks. - @insn_flows = {} - cfg.insns.each_key do |length| - @insn_flows[length] = DataFlow.new - end + # This data structure will hold the data flow between basic blocks. + attr_reader :block_flows - # This data structure will hold the data flow between basic blocks. - @block_flows = {} - cfg.blocks.each do |block| - @block_flows[block.id] = DataFlow.new - end + def initialize(cfg) + @cfg = cfg + @insn_flows = cfg.insns.to_h { |length, _| [length, DataFlow.new] } + @block_flows = cfg.blocks.to_h { |block| [block.id, DataFlow.new] } end def compile diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 8b86851e..7756d125 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -161,7 +161,7 @@ def with_prefix(value) begin @current_prefix = value - yield + yield value ensure @current_prefix = previous end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 83453837..45fc6121 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -277,7 +277,7 @@ def disasm end def inspect - "#:1 (#{line},#{0})-(#{line},#{0})>" + "#:1 (#{line},0)-(#{line},0)>" end # This method converts our linked list of instructions into a final array From 5526f399e81e7ec418a4a667e7c86d0082de9b1f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Feb 2023 21:13:00 -0500 Subject: [PATCH 14/14] Split out calldata into its own file --- lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/calldata.rb | 91 ++++++++++++++++++++++++++++ lib/syntax_tree/yarv/disassembler.rb | 25 +------- lib/syntax_tree/yarv/instructions.rb | 61 ------------------- 4 files changed, 93 insertions(+), 85 deletions(-) create mode 100644 lib/syntax_tree/yarv/calldata.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index e0e2a6be..ade9ff5e 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -31,6 +31,7 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/basic_block" require_relative "syntax_tree/yarv/bf" +require_relative "syntax_tree/yarv/calldata" require_relative "syntax_tree/yarv/compiler" require_relative "syntax_tree/yarv/control_flow_graph" require_relative "syntax_tree/yarv/data_flow_graph" diff --git a/lib/syntax_tree/yarv/calldata.rb b/lib/syntax_tree/yarv/calldata.rb new file mode 100644 index 00000000..fadea61b --- /dev/null +++ b/lib/syntax_tree/yarv/calldata.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This is an operand to various YARV instructions that represents the + # information about a specific call site. + class CallData + CALL_ARGS_SPLAT = 1 << 0 + CALL_ARGS_BLOCKARG = 1 << 1 + CALL_FCALL = 1 << 2 + CALL_VCALL = 1 << 3 + CALL_ARGS_SIMPLE = 1 << 4 + CALL_BLOCKISEQ = 1 << 5 + CALL_KWARG = 1 << 6 + CALL_KW_SPLAT = 1 << 7 + CALL_TAILCALL = 1 << 8 + CALL_SUPER = 1 << 9 + CALL_ZSUPER = 1 << 10 + CALL_OPT_SEND = 1 << 11 + CALL_KW_SPLAT_MUT = 1 << 12 + + attr_reader :method, :argc, :flags, :kw_arg + + def initialize( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + @method = method + @argc = argc + @flags = flags + @kw_arg = kw_arg + end + + def flag?(mask) + (flags & mask) > 0 + end + + def to_h + result = { mid: method, flag: flags, orig_argc: argc } + result[:kw_arg] = kw_arg if kw_arg + result + end + + def inspect + names = [] + names << :ARGS_SPLAT if flag?(CALL_ARGS_SPLAT) + names << :ARGS_BLOCKARG if flag?(CALL_ARGS_BLOCKARG) + names << :FCALL if flag?(CALL_FCALL) + names << :VCALL if flag?(CALL_VCALL) + names << :ARGS_SIMPLE if flag?(CALL_ARGS_SIMPLE) + names << :BLOCKISEQ if flag?(CALL_BLOCKISEQ) + names << :KWARG if flag?(CALL_KWARG) + names << :KW_SPLAT if flag?(CALL_KW_SPLAT) + names << :TAILCALL if flag?(CALL_TAILCALL) + names << :SUPER if flag?(CALL_SUPER) + names << :ZSUPER if flag?(CALL_ZSUPER) + names << :OPT_SEND if flag?(CALL_OPT_SEND) + names << :KW_SPLAT_MUT if flag?(CALL_KW_SPLAT_MUT) + + parts = [] + parts << "mid:#{method}" if method + parts << "argc:#{argc}" + parts << "kw:[#{kw_arg.join(", ")}]" if kw_arg + parts << names.join("|") if names.any? + + "" + end + + def self.from(serialized) + new( + serialized[:mid], + serialized[:orig_argc], + serialized[:flag], + serialized[:kw_arg] + ) + end + end + + # A convenience method for creating a CallData object. + def self.calldata( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + CallData.new(method, argc, flags, kw_arg) + end + end +end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 7756d125..ad66d0bf 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -21,30 +21,7 @@ def initialize(current_iseq = nil) ######################################################################## def calldata(value) - flag_names = [] - flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) - if value.flag?(CallData::CALL_ARGS_BLOCKARG) - flag_names << :ARGS_BLOCKARG - end - flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) - flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) - flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) - flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) - flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) - flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) - flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) - flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) - flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) - flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) - flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) - - parts = [] - parts << "mid:#{value.method}" if value.method - parts << "argc:#{value.argc}" - parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg - parts << flag_names.join("|") if flag_names.any? - - "" + value.inspect end def enqueue(iseq) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 97ccce15..9bd8f0cd 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -2,67 +2,6 @@ module SyntaxTree module YARV - # This is an operand to various YARV instructions that represents the - # information about a specific call site. - class CallData - CALL_ARGS_SPLAT = 1 << 0 - CALL_ARGS_BLOCKARG = 1 << 1 - CALL_FCALL = 1 << 2 - CALL_VCALL = 1 << 3 - CALL_ARGS_SIMPLE = 1 << 4 - CALL_BLOCKISEQ = 1 << 5 - CALL_KWARG = 1 << 6 - CALL_KW_SPLAT = 1 << 7 - CALL_TAILCALL = 1 << 8 - CALL_SUPER = 1 << 9 - CALL_ZSUPER = 1 << 10 - CALL_OPT_SEND = 1 << 11 - CALL_KW_SPLAT_MUT = 1 << 12 - - attr_reader :method, :argc, :flags, :kw_arg - - def initialize( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - @method = method - @argc = argc - @flags = flags - @kw_arg = kw_arg - end - - def flag?(mask) - (flags & mask) > 0 - end - - def to_h - result = { mid: method, flag: flags, orig_argc: argc } - result[:kw_arg] = kw_arg if kw_arg - result - end - - def self.from(serialized) - new( - serialized[:mid], - serialized[:orig_argc], - serialized[:flag], - serialized[:kw_arg] - ) - end - end - - # A convenience method for creating a CallData object. - def self.calldata( - method, - argc = 0, - flags = CallData::CALL_ARGS_SIMPLE, - kw_arg = nil - ) - CallData.new(method, argc, flags, kw_arg) - end - # This is a base class for all YARV instructions. It provides a few # convenience methods for working with instructions. class Instruction