diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 41a33a78..1357e95f 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -30,13 +30,13 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" -require_relative "syntax_tree/yarv/assembler" require_relative "syntax_tree/yarv/decompiler" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" require_relative "syntax_tree/yarv/instructions" require_relative "syntax_tree/yarv/legacy" require_relative "syntax_tree/yarv/local_table" +require_relative "syntax_tree/yarv/assembler" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb index b5df37b8..c3a874e9 100644 --- a/lib/syntax_tree/yarv/assembler.rb +++ b/lib/syntax_tree/yarv/assembler.rb @@ -33,20 +33,37 @@ def initialize(filepath) def assemble iseq = InstructionSequence.new(:top, "
", nil, Location.default) - labels = {} + assemble_iseq(iseq, File.readlines(filepath, chomp: true)) + + iseq.compile! + iseq + end + + def self.assemble(filepath) + new(filepath).assemble + end + + private + + def assemble_iseq(iseq, lines) + labels = Hash.new { |hash, name| hash[name] = iseq.label } + line_index = 0 + + while line_index < lines.length + line = lines[line_index] + line_index += 1 - File.foreach(filepath, chomp: true) do |line| case line.strip - when "" - # skip over blank lines - next - when /^;/ - # skip over comments + when "", /^;/ + # skip over blank lines and comments next when /^(\w+):$/ # create labels - iseq.push(labels[$1] = iseq.label) + iseq.push(labels[$1]) next + when /^__END__/ + # skip over the rest of the file when we hit __END__ + return end insn, operands = line.split(" ", 2) @@ -56,6 +73,12 @@ def assemble iseq.adjuststack(parse_number(operands)) when "anytostring" iseq.anytostring + when "branchif" + iseq.branchif(labels[operands]) + when "branchnil" + iseq.branchnil(labels[operands]) + when "branchunless" + iseq.branchunless(labels[operands]) when "checkmatch" iseq.checkmatch(parse_number(operands)) when "checktype" @@ -64,84 +87,200 @@ def assemble iseq.concatarray when "concatstrings" iseq.concatstrings(parse_number(operands)) + when "defineclass" + body = parse_nested(lines[line_index..]) + line_index += body.length + + name_value, flags_value = operands.split(/,\s*/) + name = parse_symbol(name_value) + flags = parse_number(flags_value) + + class_iseq = iseq.class_child_iseq(name.to_s, Location.default) + assemble_iseq(class_iseq, body) + iseq.defineclass(name, class_iseq, flags) + when "definemethod" + body = parse_nested(lines[line_index..]) + line_index += body.length + + name = parse_symbol(operands) + method_iseq = iseq.method_child_iseq(name.to_s, Location.default) + assemble_iseq(method_iseq, body) + + iseq.definemethod(name, method_iseq) + when "definesmethod" + body = parse_nested(lines[line_index..]) + line_index += body.length + + name = parse_symbol(operands) + method_iseq = iseq.method_child_iseq(name.to_s, Location.default) + + assemble_iseq(method_iseq, body) + iseq.definesmethod(name, method_iseq) when "dup" iseq.dup when "dupn" iseq.dupn(parse_number(operands)) when "duparray" - object = parse(operands) - raise unless object.is_a?(Array) - - iseq.duparray(object) + iseq.duparray(parse_type(operands, Array)) when "duphash" - object = parse(operands) - raise unless object.is_a?(Hash) - - iseq.duphash(object) + iseq.duphash(parse_type(operands, Hash)) + when "expandarray" + number, flags = operands.split(/,\s*/) + iseq.expandarray(parse_number(number), parse_number(flags)) + when "getclassvariable" + iseq.getclassvariable(parse_symbol(operands)) + when "getconstant" + iseq.getconstant(parse_symbol(operands)) + when "getglobal" + iseq.getglobal(parse_symbol(operands)) when "getinstancevariable" - object = parse(operands) - raise unless object.is_a?(Symbol) + iseq.getinstancevariable(parse_symbol(operands)) + when "getlocal" + name_string, level_string = operands.split(/,\s*/) + name = name_string.to_sym + level = level_string&.to_i || 0 - iseq.getinstancevariable(object) + iseq.local_table.plain(name) + lookup = iseq.local_table.find(name, level) + iseq.getlocal(lookup.index, lookup.level) + when "getspecial" + key, type = operands.split(/,\s*/) + iseq.getspecial(parse_number(key), parse_number(type)) when "intern" iseq.intern + when "invokesuper" + cdata = + if operands + calldata(operands) + else + YARV.calldata( + nil, + 0, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | + CallData::CALL_SUPER + ) + end + + block_iseq = + if lines[line_index].start_with?(" ") + body = parse_nested(lines[line_index..]) + line_index += body.length + + block_iseq = iseq.block_child_iseq(Location.default) + assemble_iseq(block_iseq, body) + block_iseq + end + + iseq.invokesuper(cdata, block_iseq) + when "jump" + iseq.jump(labels[operands]) when "leave" iseq.leave when "newarray" iseq.newarray(parse_number(operands)) + when "newarraykwsplat" + iseq.newarraykwsplat(parse_number(operands)) + when "newhash" + iseq.newhash(parse_number(operands)) when "newrange" - object = parse(operands) - raise if object != 0 && object != 1 - - iseq.newrange(operands.to_i) + iseq.newrange(parse_options(operands, [0, 1])) when "nop" iseq.nop when "objtostring" - iseq.objtostring( - YARV.calldata( - :to_s, - 0, - CallData::CALL_ARGS_SIMPLE | CallData::CALL_FCALL - ) - ) + iseq.objtostring(YARV.calldata(:to_s)) + when "once" + block_iseq = + if lines[line_index].start_with?(" ") + body = parse_nested(lines[line_index..]) + line_index += body.length + + block_iseq = iseq.block_child_iseq(Location.default) + assemble_iseq(block_iseq, body) + block_iseq + end + + iseq.once(block_iseq, iseq.inline_storage) when "opt_and" iseq.send(YARV.calldata(:&, 1)) when "opt_aref" iseq.send(YARV.calldata(:[], 1)) when "opt_aref_with" - object = parse(operands) - raise unless object.is_a?(String) + iseq.opt_aref_with(parse_string(operands), YARV.calldata(:[], 1)) + when "opt_aset" + iseq.send(YARV.calldata(:[]=, 2)) + when "opt_aset_with" + iseq.opt_aset_with(parse_string(operands), YARV.calldata(:[]=, 2)) + when "opt_case_dispatch" + cdhash_value, else_label_value = operands.split(/\s*\},\s*/) + cdhash_value.sub!(/\A\{/, "") + + pairs = + cdhash_value + .split(/\s*,\s*/) + .map! { |pair| pair.split(/\s*=>\s*/) } + + cdhash = pairs.to_h { |value, nm| [parse(value), labels[nm]] } + else_label = labels[else_label_value] - iseq.opt_aref_with(object, YARV.calldata(:[], 1)) + iseq.opt_case_dispatch(cdhash, else_label) when "opt_div" iseq.send(YARV.calldata(:/, 1)) when "opt_empty_p" - iseq.send( - YARV.calldata( - :empty?, - 0, - CallData::CALL_ARGS_SIMPLE | CallData::CALL_FCALL - ) - ) - when "opt_eqeq" + iseq.send(YARV.calldata(:empty?)) + when "opt_eq" iseq.send(YARV.calldata(:==, 1)) when "opt_ge" iseq.send(YARV.calldata(:>=, 1)) + when "opt_gt" + iseq.send(YARV.calldata(:>, 1)) when "opt_getconstant_path" - object = parse(operands) - raise unless object.is_a?(Array) - - iseq.opt_getconstant_path(object) + iseq.opt_getconstant_path(parse_type(operands, Array)) + when "opt_le" + iseq.send(YARV.calldata(:<=, 1)) + when "opt_length" + iseq.send(YARV.calldata(:length)) + when "opt_lt" + iseq.send(YARV.calldata(:<, 1)) when "opt_ltlt" iseq.send(YARV.calldata(:<<, 1)) when "opt_minus" iseq.send(YARV.calldata(:-, 1)) + when "opt_mod" + iseq.send(YARV.calldata(:%, 1)) when "opt_mult" iseq.send(YARV.calldata(:*, 1)) + when "opt_neq" + iseq.send(YARV.calldata(:!=, 1)) + when "opt_newarray_max" + iseq.newarray(parse_number(operands)) + iseq.send(YARV.calldata(:max)) + when "opt_newarray_min" + iseq.newarray(parse_number(operands)) + iseq.send(YARV.calldata(:min)) + when "opt_nil_p" + iseq.send(YARV.calldata(:nil?)) + when "opt_not" + iseq.send(YARV.calldata(:!)) when "opt_or" iseq.send(YARV.calldata(:|, 1)) when "opt_plus" iseq.send(YARV.calldata(:+, 1)) + when "opt_regexpmatch2" + iseq.send(YARV.calldata(:=~, 1)) + when "opt_reverse" + iseq.send(YARV.calldata(:reverse)) + when "opt_send_without_block" + iseq.send(calldata(operands)) + when "opt_size" + iseq.send(YARV.calldata(:size)) + when "opt_str_freeze" + iseq.putstring(parse_string(operands)) + iseq.send(YARV.calldata(:freeze)) + when "opt_str_uminus" + iseq.putstring(parse_string(operands)) + iseq.send(YARV.calldata(:-@)) + when "opt_succ" + iseq.send(YARV.calldata(:succ)) when "pop" iseq.pop when "putnil" @@ -150,38 +289,60 @@ def assemble iseq.putobject(parse(operands)) when "putself" iseq.putself + when "putspecialobject" + iseq.putspecialobject(parse_options(operands, [1, 2, 3])) when "putstring" - object = parse(operands) - raise unless object.is_a?(String) - - iseq.putstring(object) + iseq.putstring(parse_string(operands)) when "send" - iseq.send(calldata(operands)) - when "setinstancevariable" - object = parse(operands) - raise unless object.is_a?(Symbol) + block_iseq = + if lines[line_index].start_with?(" ") + body = parse_nested(lines[line_index..]) + line_index += body.length + + block_iseq = iseq.block_child_iseq(Location.default) + assemble_iseq(block_iseq, body) + block_iseq + end + + iseq.send(calldata(operands), block_iseq) + when "setconstant" + iseq.setconstant(parse_symbol(operands)) + when "setglobal" + iseq.setglobal(parse_symbol(operands)) + when "setlocal" + name_string, level_string = operands.split(/,\s*/) + name = name_string.to_sym + level = level_string&.to_i || 0 - iseq.setinstancevariable(object) + iseq.local_table.plain(name) + lookup = iseq.local_table.find(name, level) + iseq.setlocal(lookup.index, lookup.level) + when "setn" + iseq.setn(parse_number(operands)) + when "setclassvariable" + iseq.setclassvariable(parse_symbol(operands)) + when "setinstancevariable" + iseq.setinstancevariable(parse_symbol(operands)) + when "setspecial" + iseq.setspecial(parse_number(operands)) + when "splatarray" + iseq.splatarray(parse_options(operands, [true, false])) when "swap" iseq.swap + when "topn" + iseq.topn(parse_number(operands)) when "toregexp" options, length = operands.split(", ") iseq.toregexp(parse_number(options), parse_number(length)) + when "ARG_REQ" + iseq.argument_size += 1 + iseq.local_table.plain(operands.to_sym) else raise "Could not understand: #{line}" end end - - iseq.compile! - iseq end - def self.assemble(filepath) - new(filepath).assemble - end - - private - def parse(value) program = SyntaxTree.parse(value) raise if program.statements.body.length != 1 @@ -189,50 +350,52 @@ def parse(value) program.statements.body.first.accept(ObjectVisitor.new) end + def parse_options(value, options) + parse(value).tap { raise unless options.include?(_1) } + end + + def parse_type(value, type) + parse(value).tap { raise unless _1.is_a?(type) } + end + def parse_number(value) - object = parse(value) - raise unless object.is_a?(Integer) + parse_type(value, Integer) + end + + def parse_string(value) + parse_type(value, String) + end - object + def parse_symbol(value) + parse_type(value, Symbol) end + def parse_nested(lines) + body = lines.take_while { |line| line.match?(/^($|;| )/) } + body.map! { |line| line.delete_prefix!(" ") || +"" } + end + + CALLDATA_FLAGS = { + "ARGS_SPLAT" => CallData::CALL_ARGS_SPLAT, + "ARGS_BLOCKARG" => CallData::CALL_ARGS_BLOCKARG, + "FCALL" => CallData::CALL_FCALL, + "VCALL" => CallData::CALL_VCALL, + "ARGS_SIMPLE" => CallData::CALL_ARGS_SIMPLE, + "BLOCKISEQ" => CallData::CALL_BLOCKISEQ, + "KWARG" => CallData::CALL_KWARG, + "KW_SPLAT" => CallData::CALL_KW_SPLAT, + "TAILCALL" => CallData::CALL_TAILCALL, + "SUPER" => CallData::CALL_SUPER, + "ZSUPER" => CallData::CALL_ZSUPER, + "OPT_SEND" => CallData::CALL_OPT_SEND, + "KW_SPLAT_MUT" => CallData::CALL_KW_SPLAT_MUT + }.freeze + def calldata(value) message, argc_value, flags_value = value.split flags = if flags_value - flags_value - .split("|") - .map do |flag| - case flag - when "ARGS_SPLAT" - CallData::CALL_ARGS_SPLAT - when "ARGS_BLOCKARG" - CallData::CALL_ARGS_BLOCKARG - when "FCALL" - CallData::CALL_FCALL - when "VCALL" - CallData::CALL_VCALL - when "ARGS_SIMPLE" - CallData::CALL_ARGS_SIMPLE - when "BLOCKISEQ" - CallData::CALL_BLOCKISEQ - when "KWARG" - CallData::CALL_KWARG - when "KW_SPLAT" - CallData::CALL_KW_SPLAT - when "TAILCALL" - CallData::CALL_TAILCALL - when "SUPER" - CallData::CALL_SUPER - when "ZSUPER" - CallData::CALL_ZSUPER - when "OPT_SEND" - CallData::CALL_OPT_SEND - when "KW_SPLAT_MUT" - CallData::CALL_KW_SPLAT_MUT - end - end - .inject(:|) + flags_value.split("|").map(&CALLDATA_FLAGS).inject(:|) else CallData::CALL_ARGS_SIMPLE end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 496c2075..4af5d6f0 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -125,7 +125,7 @@ def self.compile(node) end def visit_array(node) - visit_all(node.contents.parts) + node.contents ? visit_all(node.contents.parts) : [] end def visit_bare_assoc_hash(node) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 0f1eadd0..48305be6 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -220,13 +220,7 @@ def length def eval raise "Unsupported platform" if ISEQ_LOAD.nil? - compiled = to_a - - # Temporary hack until we get these working. - compiled[4][:node_id] = -1 - compiled[4][:node_ids] = [-1] * insns.length - - Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval + Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(to_a), 0, nil)).eval end def to_a @@ -257,7 +251,9 @@ def to_a { arg_size: argument_size, local_size: local_table.size, - stack_max: stack.maximum_size + stack_max: stack.maximum_size, + node_id: -1, + node_ids: [-1] * insns.length }, name, "", diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 772f1bb3..288edb16 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -2695,7 +2695,7 @@ def to_a(_iseq) [ :opt_case_dispatch, case_dispatch_hash.flat_map { |key, value| [key, value.name] }, - else_label + else_label.name ] end