From aeafc84aae49687ea2607dfad648a41132f913cb Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 14 Dec 2022 15:09:18 -0500 Subject: [PATCH 1/2] Rename YARV classes for consistency --- lib/syntax_tree.rb | 2 +- lib/syntax_tree/yarv/decompiler.rb | 254 ++++++++++++ lib/syntax_tree/yarv/disasm_formatter.rb | 211 ---------- lib/syntax_tree/yarv/disassembler.rb | 389 +++++++++---------- lib/syntax_tree/yarv/instruction_sequence.rb | 6 +- test/yarv_test.rb | 8 +- 6 files changed, 435 insertions(+), 435 deletions(-) create mode 100644 lib/syntax_tree/yarv/decompiler.rb delete mode 100644 lib/syntax_tree/yarv/disasm_formatter.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index eadb485d..2e2d2a42 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -30,7 +30,7 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" -require_relative "syntax_tree/yarv/disasm_formatter" +require_relative "syntax_tree/yarv/decompiler" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" require_relative "syntax_tree/yarv/instructions" diff --git a/lib/syntax_tree/yarv/decompiler.rb b/lib/syntax_tree/yarv/decompiler.rb new file mode 100644 index 00000000..a6a567fb --- /dev/null +++ b/lib/syntax_tree/yarv/decompiler.rb @@ -0,0 +1,254 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class is responsible for taking a compiled instruction sequence and + # walking through it to generate equivalent Ruby code. + class Decompiler + # When we're decompiling, we use a looped case statement to emulate + # jumping around in the same way the virtual machine would. This class + # provides convenience methods for generating the AST nodes that have to + # do with that label. + class BlockLabel + include DSL + attr_reader :name + + def initialize(name) + @name = name + end + + def field + VarField(Ident(name)) + end + + def ref + VarRef(Ident(name)) + end + end + + include DSL + attr_reader :iseq, :block_label + + def initialize(iseq) + @iseq = iseq + @block_label = BlockLabel.new("__block_label") + end + + def to_ruby + Program(decompile(iseq)) + end + + private + + def node_for(value) + case value + when Integer + Int(value.to_s) + when Symbol + SymbolLiteral(Ident(value.to_s)) + end + end + + def decompile(iseq) + label = :label_0 + clauses = {} + clause = [] + + iseq.insns.each do |insn| + case insn + when InstructionSequence::Label + unless clause.last.is_a?(Next) + clause << Assign(block_label.field, node_for(insn.name)) + end + + clauses[label] = clause + clause = [] + label = insn.name + when BranchUnless + body = [ + Assign(block_label.field, node_for(insn.label.name)), + Next(Args([])) + ] + + clause << IfNode(clause.pop, Statements(body), nil) + when Dup + clause << clause.last + when DupHash + assocs = + insn.object.map do |key, value| + Assoc(node_for(key), node_for(value)) + end + + clause << HashLiteral(LBrace("{"), assocs) + when GetGlobal + clause << VarRef(GVar(insn.name.to_s)) + when GetLocalWC0 + local = iseq.local_table.locals[insn.index] + clause << VarRef(Ident(local.name.to_s)) + when Jump + clause << Assign(block_label.field, node_for(insn.label.name)) + clause << Next(Args([])) + when Leave + value = Args([clause.pop]) + clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) + when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, + OptMinus, OptMod, OptMult, OptOr, OptPlus + left, right = clause.pop(2) + clause << Binary(left, insn.calldata.method, right) + when OptAref + collection, arg = clause.pop(2) + clause << ARef(collection, Args([arg])) + when OptAset + collection, arg, value = clause.pop(3) + + clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && + collection === value.left.collection && + arg === value.left.index.parts[0] + OpAssign( + ARefField(collection, Args([arg])), + Op("#{value.operator}="), + value.right + ) + else + Assign(ARefField(collection, Args([arg])), value) + end + when OptNEq + left, right = clause.pop(2) + clause << Binary(left, :"!=", right) + when OptSendWithoutBlock + method = insn.calldata.method.to_s + argc = insn.calldata.argc + + if insn.calldata.flag?(CallData::CALL_FCALL) + if argc == 0 + clause.pop + clause << CallNode(nil, nil, Ident(method), Args([])) + elsif argc == 1 && method.end_with?("=") + _receiver, argument = clause.pop(2) + clause << Assign( + CallNode(nil, nil, Ident(method[0..-2]), nil), + argument + ) + else + _receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + nil, + nil, + Ident(method), + ArgParen(Args(arguments)) + ) + end + else + if argc == 0 + clause << CallNode(clause.pop, Period("."), Ident(method), nil) + elsif argc == 1 && method.end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign( + CallNode(receiver, Period("."), Ident(method[0..-2]), nil), + argument + ) + else + receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + receiver, + Period("."), + Ident(method), + ArgParen(Args(arguments)) + ) + end + end + when PutObject + case insn.object + when Float + clause << FloatLiteral(insn.object.inspect) + when Integer + clause << Int(insn.object.inspect) + else + raise "Unknown object type: #{insn.object.class.name}" + end + when PutObjectInt2Fix0 + clause << Int("0") + when PutObjectInt2Fix1 + clause << Int("1") + when PutSelf + clause << VarRef(Kw("self")) + when SetGlobal + target = GVar(insn.name.to_s) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + Assign(VarField(target), value) + end + when SetLocalWC0 + target = Ident(local_name(insn.index, 0)) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + Assign(VarField(target), value) + end + else + raise "Unknown instruction #{insn}" + end + end + + # If there's only one clause, then we don't need a case statement, and + # we can just disassemble the first clause. + clauses[label] = clause + return Statements(clauses.values.first) if clauses.size == 1 + + # Here we're going to build up a big case statement that will handle all + # of the different labels. + current = nil + clauses.reverse_each do |current_label, current_clause| + current = + When( + Args([node_for(current_label)]), + Statements(current_clause), + current + ) + end + switch = Case(Kw("case"), block_label.ref, current) + + # Here we're going to make sure that any locals that were established in + # the label_0 block are initialized so that scoping rules work + # correctly. + stack = [] + locals = [block_label.name] + + clauses[:label_0].each do |node| + if node.is_a?(Assign) && node.target.is_a?(VarField) && + node.target.value.is_a?(Ident) + value = node.target.value.value + next if locals.include?(value) + + stack << Assign(node.target, VarRef(Kw("nil"))) + locals << value + end + end + + # Finally, we'll set up the initial label and loop the entire case + # statement. + stack << Assign(block_label.field, node_for(:label_0)) + stack << MethodAddBlock( + CallNode(nil, nil, Ident("loop"), Args([])), + BlockNode( + Kw("do"), + nil, + BodyStmt(Statements([switch]), nil, nil, nil, nil) + ) + ) + Statements(stack) + end + + def local_name(index, level) + current = iseq + level.times { current = current.parent_iseq } + current.local_table.locals[index].name.to_s + end + end + end +end diff --git a/lib/syntax_tree/yarv/disasm_formatter.rb b/lib/syntax_tree/yarv/disasm_formatter.rb deleted file mode 100644 index 566bc8fd..00000000 --- a/lib/syntax_tree/yarv/disasm_formatter.rb +++ /dev/null @@ -1,211 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - class DisasmFormatter - attr_reader :output, :queue - attr_reader :current_prefix, :current_iseq - - def initialize - @output = StringIO.new - @queue = [] - - @current_prefix = "" - @current_iseq = nil - end - - ######################################################################## - # Helpers for various instructions - ######################################################################## - - def calldata(value) - flag_names = [] - flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) - if value.flag?(CallData::CALL_ARGS_BLOCKARG) - flag_names << :ARGS_BLOCKARG - end - flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) - flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) - flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) - flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) - flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) - flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) - flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) - flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) - flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) - flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) - flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) - - parts = [] - parts << "mid:#{value.method}" if value.method - parts << "argc:#{value.argc}" - parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg - parts << flag_names.join("|") if flag_names.any? - - "" - end - - def enqueue(iseq) - queue << iseq - end - - def event(name) - case name - when :RUBY_EVENT_B_CALL - "Bc" - when :RUBY_EVENT_B_RETURN - "Br" - when :RUBY_EVENT_CALL - "Ca" - when :RUBY_EVENT_CLASS - "Cl" - when :RUBY_EVENT_END - "En" - when :RUBY_EVENT_LINE - "Li" - when :RUBY_EVENT_RETURN - "Re" - else - raise "Unknown event: #{name}" - end - end - - def inline_storage(cache) - "" - end - - def instruction(name, operands = []) - operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")] - end - - def label(value) - value.name["label_".length..] - end - - def local(index, explicit: nil, implicit: nil) - current = current_iseq - (explicit || implicit).times { current = current.parent_iseq } - - value = "#{current.local_table.name_at(index)}@#{index}" - value << ", #{explicit}" if explicit - value - end - - def object(value) - value.inspect - end - - ######################################################################## - # Main entrypoint - ######################################################################## - - def format! - while (@current_iseq = queue.shift) - output << "\n" if output.pos > 0 - format_iseq(@current_iseq) - end - - output.string - end - - private - - def format_iseq(iseq) - output << "#{current_prefix}== disasm: " - output << "#:1 " - - location = iseq.location - output << "(#{location.start_line},#{location.start_column})-" - output << "(#{location.end_line},#{location.end_column})" - output << "> " - - if iseq.catch_table.any? - output << "(catch: TRUE)\n" - output << "#{current_prefix}== catch table\n" - - with_prefix("#{current_prefix}| ") do - iseq.catch_table.each do |entry| - case entry - when InstructionSequence::CatchBreak - output << "#{current_prefix}catch type: break\n" - format_iseq(entry.iseq) - when InstructionSequence::CatchNext - output << "#{current_prefix}catch type: next\n" - when InstructionSequence::CatchRedo - output << "#{current_prefix}catch type: redo\n" - when InstructionSequence::CatchRescue - output << "#{current_prefix}catch type: rescue\n" - format_iseq(entry.iseq) - end - end - end - - output << "#{current_prefix}|#{"-" * 72}\n" - else - output << "(catch: FALSE)\n" - end - - if (local_table = iseq.local_table) && !local_table.empty? - output << "#{current_prefix}local table (size: #{local_table.size})\n" - - locals = - local_table.locals.each_with_index.map do |local, index| - "[%2d] %s@%d" % [local_table.offset(index), local.name, index] - end - - output << "#{current_prefix}#{locals.join(" ")}\n" - end - - length = 0 - events = [] - lines = [] - - iseq.insns.each do |insn| - case insn - when Integer - lines << insn - when Symbol - events << event(insn) - when InstructionSequence::Label - # skip - else - output << "#{current_prefix}%04d " % length - - disasm = insn.disasm(self) - output << disasm - - if lines.any? - output << " " * (65 - disasm.length) if disasm.length < 65 - elsif events.any? - output << " " * (39 - disasm.length) if disasm.length < 39 - end - - if lines.any? - output << "(%4d)" % lines.last - lines.clear - end - - if events.any? - output << "[#{events.join}]" - events.clear - end - - output << "\n" - length += insn.length - end - end - end - - def with_prefix(value) - previous = @current_prefix - - begin - @current_prefix = value - yield - ensure - @current_prefix = previous - end - end - end - end -end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index af325c31..033b6d3d 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -2,252 +2,209 @@ module SyntaxTree module YARV - # This class is responsible for taking a compiled instruction sequence and - # walking through it to generate equivalent Ruby code. class Disassembler - # When we're disassmebling, we use a looped case statement to emulate - # jumping around in the same way the virtual machine would. This class - # provides convenience methods for generating the AST nodes that have to - # do with that label. - class DisasmLabel - include DSL - attr_reader :name - - def initialize(name) - @name = name - end + attr_reader :output, :queue + attr_reader :current_prefix, :current_iseq + + def initialize + @output = StringIO.new + @queue = [] + + @current_prefix = "" + @current_iseq = nil + end + + ######################################################################## + # Helpers for various instructions + ######################################################################## - def field - VarField(Ident(name)) + def calldata(value) + flag_names = [] + flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) + if value.flag?(CallData::CALL_ARGS_BLOCKARG) + flag_names << :ARGS_BLOCKARG end + flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) + flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) + flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) + flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) + flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) + flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) + flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) + flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) + flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) + flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) + flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) + + parts = [] + parts << "mid:#{value.method}" if value.method + parts << "argc:#{value.argc}" + parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg + parts << flag_names.join("|") if flag_names.any? + + "" + end - def ref - VarRef(Ident(name)) + def enqueue(iseq) + queue << iseq + end + + def event(name) + case name + when :RUBY_EVENT_B_CALL + "Bc" + when :RUBY_EVENT_B_RETURN + "Br" + when :RUBY_EVENT_CALL + "Ca" + when :RUBY_EVENT_CLASS + "Cl" + when :RUBY_EVENT_END + "En" + when :RUBY_EVENT_LINE + "Li" + when :RUBY_EVENT_RETURN + "Re" + else + raise "Unknown event: #{name}" end end - include DSL - attr_reader :iseq, :disasm_label + def inline_storage(cache) + "" + end - def initialize(iseq) - @iseq = iseq - @disasm_label = DisasmLabel.new("__disasm_label") + def instruction(name, operands = []) + operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")] end - def to_ruby - Program(disassemble(iseq)) + def label(value) + value.name["label_".length..] end - private + def local(index, explicit: nil, implicit: nil) + current = current_iseq + (explicit || implicit).times { current = current.parent_iseq } + + value = "#{current.local_table.name_at(index)}@#{index}" + value << ", #{explicit}" if explicit + value + end - def node_for(value) - case value - when Integer - Int(value.to_s) - when Symbol - SymbolLiteral(Ident(value.to_s)) + def object(value) + value.inspect + end + + ######################################################################## + # Main entrypoint + ######################################################################## + + def format! + while (@current_iseq = queue.shift) + output << "\n" if output.pos > 0 + format_iseq(@current_iseq) end + + output.string end - def disassemble(iseq) - label = :label_0 - clauses = {} - clause = [] + private + + def format_iseq(iseq) + output << "#{current_prefix}== disasm: " + output << "#:1 " + + location = iseq.location + output << "(#{location.start_line},#{location.start_column})-" + output << "(#{location.end_line},#{location.end_column})" + output << "> " + + if iseq.catch_table.any? + output << "(catch: TRUE)\n" + output << "#{current_prefix}== catch table\n" + + with_prefix("#{current_prefix}| ") do + iseq.catch_table.each do |entry| + case entry + when InstructionSequence::CatchBreak + output << "#{current_prefix}catch type: break\n" + format_iseq(entry.iseq) + when InstructionSequence::CatchNext + output << "#{current_prefix}catch type: next\n" + when InstructionSequence::CatchRedo + output << "#{current_prefix}catch type: redo\n" + when InstructionSequence::CatchRescue + output << "#{current_prefix}catch type: rescue\n" + format_iseq(entry.iseq) + end + end + end + + output << "#{current_prefix}|#{"-" * 72}\n" + else + output << "(catch: FALSE)\n" + end + + if (local_table = iseq.local_table) && !local_table.empty? + output << "#{current_prefix}local table (size: #{local_table.size})\n" + + locals = + local_table.locals.each_with_index.map do |local, index| + "[%2d] %s@%d" % [local_table.offset(index), local.name, index] + end + + output << "#{current_prefix}#{locals.join(" ")}\n" + end + + length = 0 + events = [] + lines = [] iseq.insns.each do |insn| case insn + when Integer + lines << insn + when Symbol + events << event(insn) when InstructionSequence::Label - unless clause.last.is_a?(Next) - clause << Assign(disasm_label.field, node_for(insn.name)) - end + # skip + else + output << "#{current_prefix}%04d " % length - clauses[label] = clause - clause = [] - label = insn.name - when BranchUnless - body = [ - Assign(disasm_label.field, node_for(insn.label.name)), - Next(Args([])) - ] - - clause << IfNode(clause.pop, Statements(body), nil) - when Dup - clause << clause.last - when DupHash - assocs = - insn.object.map do |key, value| - Assoc(node_for(key), node_for(value)) - end + disasm = insn.disasm(self) + output << disasm - clause << HashLiteral(LBrace("{"), assocs) - when GetGlobal - clause << VarRef(GVar(insn.name.to_s)) - when GetLocalWC0 - local = iseq.local_table.locals[insn.index] - clause << VarRef(Ident(local.name.to_s)) - when Jump - clause << Assign(disasm_label.field, node_for(insn.label.name)) - clause << Next(Args([])) - when Leave - value = Args([clause.pop]) - clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) - when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, - OptMinus, OptMod, OptMult, OptOr, OptPlus - left, right = clause.pop(2) - clause << Binary(left, insn.calldata.method, right) - when OptAref - collection, arg = clause.pop(2) - clause << ARef(collection, Args([arg])) - when OptAset - collection, arg, value = clause.pop(3) - - clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && - collection === value.left.collection && - arg === value.left.index.parts[0] - OpAssign( - ARefField(collection, Args([arg])), - Op("#{value.operator}="), - value.right - ) - else - Assign(ARefField(collection, Args([arg])), value) - end - when OptNEq - left, right = clause.pop(2) - clause << Binary(left, :"!=", right) - when OptSendWithoutBlock - method = insn.calldata.method.to_s - argc = insn.calldata.argc - - if insn.calldata.flag?(CallData::CALL_FCALL) - if argc == 0 - clause.pop - clause << CallNode(nil, nil, Ident(method), Args([])) - elsif argc == 1 && method.end_with?("=") - _receiver, argument = clause.pop(2) - clause << Assign( - CallNode(nil, nil, Ident(method[0..-2]), nil), - argument - ) - else - _receiver, *arguments = clause.pop(argc + 1) - clause << CallNode( - nil, - nil, - Ident(method), - ArgParen(Args(arguments)) - ) - end - else - if argc == 0 - clause << CallNode(clause.pop, Period("."), Ident(method), nil) - elsif argc == 1 && method.end_with?("=") - receiver, argument = clause.pop(2) - clause << Assign( - CallNode(receiver, Period("."), Ident(method[0..-2]), nil), - argument - ) - else - receiver, *arguments = clause.pop(argc + 1) - clause << CallNode( - receiver, - Period("."), - Ident(method), - ArgParen(Args(arguments)) - ) - end - end - when PutObject - case insn.object - when Float - clause << FloatLiteral(insn.object.inspect) - when Integer - clause << Int(insn.object.inspect) - else - raise "Unknown object type: #{insn.object.class.name}" + if lines.any? + output << " " * (65 - disasm.length) if disasm.length < 65 + elsif events.any? + output << " " * (39 - disasm.length) if disasm.length < 39 end - when PutObjectInt2Fix0 - clause << Int("0") - when PutObjectInt2Fix1 - clause << Int("1") - when PutSelf - clause << VarRef(Kw("self")) - when SetGlobal - target = GVar(insn.name.to_s) - value = clause.pop - - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign(VarField(target), Op("#{value.operator}="), value.right) - else - Assign(VarField(target), value) + + if lines.any? + output << "(%4d)" % lines.last + lines.clear end - when SetLocalWC0 - target = Ident(local_name(insn.index, 0)) - value = clause.pop - - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign(VarField(target), Op("#{value.operator}="), value.right) - else - Assign(VarField(target), value) + + if events.any? + output << "[#{events.join}]" + events.clear end - else - raise "Unknown instruction #{insn}" - end - end - # If there's only one clause, then we don't need a case statement, and - # we can just disassemble the first clause. - clauses[label] = clause - return Statements(clauses.values.first) if clauses.size == 1 - - # Here we're going to build up a big case statement that will handle all - # of the different labels. - current = nil - clauses.reverse_each do |current_label, current_clause| - current = - When( - Args([node_for(current_label)]), - Statements(current_clause), - current - ) - end - switch = Case(Kw("case"), disasm_label.ref, current) - - # Here we're going to make sure that any locals that were established in - # the label_0 block are initialized so that scoping rules work - # correctly. - stack = [] - locals = [disasm_label.name] - - clauses[:label_0].each do |node| - if node.is_a?(Assign) && node.target.is_a?(VarField) && - node.target.value.is_a?(Ident) - value = node.target.value.value - next if locals.include?(value) - - stack << Assign(node.target, VarRef(Kw("nil"))) - locals << value + output << "\n" + length += insn.length end end - - # Finally, we'll set up the initial label and loop the entire case - # statement. - stack << Assign(disasm_label.field, node_for(:label_0)) - stack << MethodAddBlock( - CallNode(nil, nil, Ident("loop"), Args([])), - BlockNode( - Kw("do"), - nil, - BodyStmt(Statements([switch]), nil, nil, nil, nil) - ) - ) - Statements(stack) end - def local_name(index, level) - current = iseq - level.times { current = current.parent_iseq } - current.local_table.locals[index].name.to_s + def with_prefix(value) + previous = @current_prefix + + begin + @current_prefix = value + yield + ensure + @current_prefix = previous + end end end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index ee5390a1..93b5018e 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -272,9 +272,9 @@ def to_a end def disasm - formatter = DisasmFormatter.new - formatter.enqueue(self) - formatter.format! + disassembler = Disassembler.new + disassembler.enqueue(self) + disassembler.format! end # This method converts our linked list of instructions into a final array diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 02514a93..f8e0ffdb 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -31,7 +31,7 @@ class YARVTest < Minitest::Test CASES.each do |source, expected| define_method("test_disassemble_#{source}") do - assert_disassembles(expected, source) + assert_decompiles(expected, source) end end @@ -41,13 +41,13 @@ def test_bf ">>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++." iseq = YARV::Bf.new(hello_world).compile - Formatter.format(hello_world, YARV::Disassembler.new(iseq).to_ruby) + Formatter.format(hello_world, YARV::Decompiler.new(iseq).to_ruby) end private - def assert_disassembles(expected, source) - ruby = YARV::Disassembler.new(YARV.compile(source)).to_ruby + def assert_decompiles(expected, source) + ruby = YARV::Decompiler.new(YARV.compile(source)).to_ruby actual = Formatter.format(source, ruby) assert_equal expected, actual end From 9d57b6a7b8592e4a00a5a1b90db89fa2988b45b1 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 16 Dec 2022 08:23:05 -0500 Subject: [PATCH 2/2] Assembler --- lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/assembler.rb | 244 +++++++++++++++++++ lib/syntax_tree/yarv/compiler.rb | 13 +- lib/syntax_tree/yarv/instruction_sequence.rb | 4 +- 4 files changed, 259 insertions(+), 3 deletions(-) create mode 100644 lib/syntax_tree/yarv/assembler.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 2e2d2a42..41a33a78 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -30,6 +30,7 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" +require_relative "syntax_tree/yarv/assembler" require_relative "syntax_tree/yarv/decompiler" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb new file mode 100644 index 00000000..b5df37b8 --- /dev/null +++ b/lib/syntax_tree/yarv/assembler.rb @@ -0,0 +1,244 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + class Assembler + class ObjectVisitor < Compiler::RubyVisitor + def visit_dyna_symbol(node) + if node.parts.empty? + :"" + else + raise CompilationError + end + end + + def visit_string_literal(node) + case node.parts.length + when 0 + "" + when 1 + raise CompilationError unless node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + raise CompilationError + end + end + end + + attr_reader :filepath + + def initialize(filepath) + @filepath = filepath + end + + def assemble + iseq = InstructionSequence.new(:top, "
", nil, Location.default) + labels = {} + + File.foreach(filepath, chomp: true) do |line| + case line.strip + when "" + # skip over blank lines + next + when /^;/ + # skip over comments + next + when /^(\w+):$/ + # create labels + iseq.push(labels[$1] = iseq.label) + next + end + + insn, operands = line.split(" ", 2) + + case insn + when "adjuststack" + iseq.adjuststack(parse_number(operands)) + when "anytostring" + iseq.anytostring + when "checkmatch" + iseq.checkmatch(parse_number(operands)) + when "checktype" + iseq.checktype(parse_number(operands)) + when "concatarray" + iseq.concatarray + when "concatstrings" + iseq.concatstrings(parse_number(operands)) + when "dup" + iseq.dup + when "dupn" + iseq.dupn(parse_number(operands)) + when "duparray" + object = parse(operands) + raise unless object.is_a?(Array) + + iseq.duparray(object) + when "duphash" + object = parse(operands) + raise unless object.is_a?(Hash) + + iseq.duphash(object) + when "getinstancevariable" + object = parse(operands) + raise unless object.is_a?(Symbol) + + iseq.getinstancevariable(object) + when "intern" + iseq.intern + when "leave" + iseq.leave + when "newarray" + iseq.newarray(parse_number(operands)) + when "newrange" + object = parse(operands) + raise if object != 0 && object != 1 + + iseq.newrange(operands.to_i) + when "nop" + iseq.nop + when "objtostring" + iseq.objtostring( + YARV.calldata( + :to_s, + 0, + CallData::CALL_ARGS_SIMPLE | CallData::CALL_FCALL + ) + ) + when "opt_and" + iseq.send(YARV.calldata(:&, 1)) + when "opt_aref" + iseq.send(YARV.calldata(:[], 1)) + when "opt_aref_with" + object = parse(operands) + raise unless object.is_a?(String) + + iseq.opt_aref_with(object, YARV.calldata(:[], 1)) + when "opt_div" + iseq.send(YARV.calldata(:/, 1)) + when "opt_empty_p" + iseq.send( + YARV.calldata( + :empty?, + 0, + CallData::CALL_ARGS_SIMPLE | CallData::CALL_FCALL + ) + ) + when "opt_eqeq" + iseq.send(YARV.calldata(:==, 1)) + when "opt_ge" + iseq.send(YARV.calldata(:>=, 1)) + when "opt_getconstant_path" + object = parse(operands) + raise unless object.is_a?(Array) + + iseq.opt_getconstant_path(object) + when "opt_ltlt" + iseq.send(YARV.calldata(:<<, 1)) + when "opt_minus" + iseq.send(YARV.calldata(:-, 1)) + when "opt_mult" + iseq.send(YARV.calldata(:*, 1)) + when "opt_or" + iseq.send(YARV.calldata(:|, 1)) + when "opt_plus" + iseq.send(YARV.calldata(:+, 1)) + when "pop" + iseq.pop + when "putnil" + iseq.putnil + when "putobject" + iseq.putobject(parse(operands)) + when "putself" + iseq.putself + when "putstring" + object = parse(operands) + raise unless object.is_a?(String) + + iseq.putstring(object) + when "send" + iseq.send(calldata(operands)) + when "setinstancevariable" + object = parse(operands) + raise unless object.is_a?(Symbol) + + iseq.setinstancevariable(object) + when "swap" + iseq.swap + when "toregexp" + options, length = operands.split(", ") + iseq.toregexp(parse_number(options), parse_number(length)) + else + raise "Could not understand: #{line}" + end + end + + iseq.compile! + iseq + end + + def self.assemble(filepath) + new(filepath).assemble + end + + private + + def parse(value) + program = SyntaxTree.parse(value) + raise if program.statements.body.length != 1 + + program.statements.body.first.accept(ObjectVisitor.new) + end + + def parse_number(value) + object = parse(value) + raise unless object.is_a?(Integer) + + object + end + + def calldata(value) + message, argc_value, flags_value = value.split + flags = + if flags_value + flags_value + .split("|") + .map do |flag| + case flag + when "ARGS_SPLAT" + CallData::CALL_ARGS_SPLAT + when "ARGS_BLOCKARG" + CallData::CALL_ARGS_BLOCKARG + when "FCALL" + CallData::CALL_FCALL + when "VCALL" + CallData::CALL_VCALL + when "ARGS_SIMPLE" + CallData::CALL_ARGS_SIMPLE + when "BLOCKISEQ" + CallData::CALL_BLOCKISEQ + when "KWARG" + CallData::CALL_KWARG + when "KW_SPLAT" + CallData::CALL_KW_SPLAT + when "TAILCALL" + CallData::CALL_TAILCALL + when "SUPER" + CallData::CALL_SUPER + when "ZSUPER" + CallData::CALL_ZSUPER + when "OPT_SEND" + CallData::CALL_OPT_SEND + when "KW_SPLAT_MUT" + CallData::CALL_KW_SPLAT_MUT + end + end + .inject(:|) + else + CallData::CALL_ARGS_SIMPLE + end + + YARV.calldata(message.to_sym, argc_value&.to_i || 0, flags) + end + end + end +end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 046fb438..4bb5d654 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -148,7 +148,18 @@ def visit_imaginary(node) end def visit_int(node) - node.value.to_i + case (value = node.value) + when /^0b/ + value[2..].to_i(2) + when /^0o/ + value[2..].to_i(8) + when /^0d/ + value[2..].to_i + when /^0x/ + value[2..].to_i(16) + else + value.to_i + end end def visit_label(node) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 93b5018e..0f1eadd0 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -223,8 +223,8 @@ def eval compiled = to_a # Temporary hack until we get these working. - compiled[4][:node_id] = 11 - compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] + compiled[4][:node_id] = -1 + compiled[4][:node_ids] = [-1] * insns.length Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval end