|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +module SyntaxTree |
| 4 | + module YARV |
| 5 | + # This class is responsible for taking a compiled instruction sequence and |
| 6 | + # walking through it to generate equivalent Ruby code. |
| 7 | + class Decompiler |
| 8 | + # When we're decompiling, we use a looped case statement to emulate |
| 9 | + # jumping around in the same way the virtual machine would. This class |
| 10 | + # provides convenience methods for generating the AST nodes that have to |
| 11 | + # do with that label. |
| 12 | + class BlockLabel |
| 13 | + include DSL |
| 14 | + attr_reader :name |
| 15 | + |
| 16 | + def initialize(name) |
| 17 | + @name = name |
| 18 | + end |
| 19 | + |
| 20 | + def field |
| 21 | + VarField(Ident(name)) |
| 22 | + end |
| 23 | + |
| 24 | + def ref |
| 25 | + VarRef(Ident(name)) |
| 26 | + end |
| 27 | + end |
| 28 | + |
| 29 | + include DSL |
| 30 | + attr_reader :iseq, :block_label |
| 31 | + |
| 32 | + def initialize(iseq) |
| 33 | + @iseq = iseq |
| 34 | + @block_label = BlockLabel.new("__block_label") |
| 35 | + end |
| 36 | + |
| 37 | + def to_ruby |
| 38 | + Program(decompile(iseq)) |
| 39 | + end |
| 40 | + |
| 41 | + private |
| 42 | + |
| 43 | + def node_for(value) |
| 44 | + case value |
| 45 | + when Integer |
| 46 | + Int(value.to_s) |
| 47 | + when Symbol |
| 48 | + SymbolLiteral(Ident(value.to_s)) |
| 49 | + end |
| 50 | + end |
| 51 | + |
| 52 | + def decompile(iseq) |
| 53 | + label = :label_0 |
| 54 | + clauses = {} |
| 55 | + clause = [] |
| 56 | + |
| 57 | + iseq.insns.each do |insn| |
| 58 | + case insn |
| 59 | + when InstructionSequence::Label |
| 60 | + unless clause.last.is_a?(Next) |
| 61 | + clause << Assign(block_label.field, node_for(insn.name)) |
| 62 | + end |
| 63 | + |
| 64 | + clauses[label] = clause |
| 65 | + clause = [] |
| 66 | + label = insn.name |
| 67 | + when BranchUnless |
| 68 | + body = [ |
| 69 | + Assign(block_label.field, node_for(insn.label.name)), |
| 70 | + Next(Args([])) |
| 71 | + ] |
| 72 | + |
| 73 | + clause << IfNode(clause.pop, Statements(body), nil) |
| 74 | + when Dup |
| 75 | + clause << clause.last |
| 76 | + when DupHash |
| 77 | + assocs = |
| 78 | + insn.object.map do |key, value| |
| 79 | + Assoc(node_for(key), node_for(value)) |
| 80 | + end |
| 81 | + |
| 82 | + clause << HashLiteral(LBrace("{"), assocs) |
| 83 | + when GetGlobal |
| 84 | + clause << VarRef(GVar(insn.name.to_s)) |
| 85 | + when GetLocalWC0 |
| 86 | + local = iseq.local_table.locals[insn.index] |
| 87 | + clause << VarRef(Ident(local.name.to_s)) |
| 88 | + when Jump |
| 89 | + clause << Assign(block_label.field, node_for(insn.label.name)) |
| 90 | + clause << Next(Args([])) |
| 91 | + when Leave |
| 92 | + value = Args([clause.pop]) |
| 93 | + clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) |
| 94 | + when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, |
| 95 | + OptMinus, OptMod, OptMult, OptOr, OptPlus |
| 96 | + left, right = clause.pop(2) |
| 97 | + clause << Binary(left, insn.calldata.method, right) |
| 98 | + when OptAref |
| 99 | + collection, arg = clause.pop(2) |
| 100 | + clause << ARef(collection, Args([arg])) |
| 101 | + when OptAset |
| 102 | + collection, arg, value = clause.pop(3) |
| 103 | + |
| 104 | + clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && |
| 105 | + collection === value.left.collection && |
| 106 | + arg === value.left.index.parts[0] |
| 107 | + OpAssign( |
| 108 | + ARefField(collection, Args([arg])), |
| 109 | + Op("#{value.operator}="), |
| 110 | + value.right |
| 111 | + ) |
| 112 | + else |
| 113 | + Assign(ARefField(collection, Args([arg])), value) |
| 114 | + end |
| 115 | + when OptNEq |
| 116 | + left, right = clause.pop(2) |
| 117 | + clause << Binary(left, :"!=", right) |
| 118 | + when OptSendWithoutBlock |
| 119 | + method = insn.calldata.method.to_s |
| 120 | + argc = insn.calldata.argc |
| 121 | + |
| 122 | + if insn.calldata.flag?(CallData::CALL_FCALL) |
| 123 | + if argc == 0 |
| 124 | + clause.pop |
| 125 | + clause << CallNode(nil, nil, Ident(method), Args([])) |
| 126 | + elsif argc == 1 && method.end_with?("=") |
| 127 | + _receiver, argument = clause.pop(2) |
| 128 | + clause << Assign( |
| 129 | + CallNode(nil, nil, Ident(method[0..-2]), nil), |
| 130 | + argument |
| 131 | + ) |
| 132 | + else |
| 133 | + _receiver, *arguments = clause.pop(argc + 1) |
| 134 | + clause << CallNode( |
| 135 | + nil, |
| 136 | + nil, |
| 137 | + Ident(method), |
| 138 | + ArgParen(Args(arguments)) |
| 139 | + ) |
| 140 | + end |
| 141 | + else |
| 142 | + if argc == 0 |
| 143 | + clause << CallNode(clause.pop, Period("."), Ident(method), nil) |
| 144 | + elsif argc == 1 && method.end_with?("=") |
| 145 | + receiver, argument = clause.pop(2) |
| 146 | + clause << Assign( |
| 147 | + CallNode(receiver, Period("."), Ident(method[0..-2]), nil), |
| 148 | + argument |
| 149 | + ) |
| 150 | + else |
| 151 | + receiver, *arguments = clause.pop(argc + 1) |
| 152 | + clause << CallNode( |
| 153 | + receiver, |
| 154 | + Period("."), |
| 155 | + Ident(method), |
| 156 | + ArgParen(Args(arguments)) |
| 157 | + ) |
| 158 | + end |
| 159 | + end |
| 160 | + when PutObject |
| 161 | + case insn.object |
| 162 | + when Float |
| 163 | + clause << FloatLiteral(insn.object.inspect) |
| 164 | + when Integer |
| 165 | + clause << Int(insn.object.inspect) |
| 166 | + else |
| 167 | + raise "Unknown object type: #{insn.object.class.name}" |
| 168 | + end |
| 169 | + when PutObjectInt2Fix0 |
| 170 | + clause << Int("0") |
| 171 | + when PutObjectInt2Fix1 |
| 172 | + clause << Int("1") |
| 173 | + when PutSelf |
| 174 | + clause << VarRef(Kw("self")) |
| 175 | + when SetGlobal |
| 176 | + target = GVar(insn.name.to_s) |
| 177 | + value = clause.pop |
| 178 | + |
| 179 | + clause << if value.is_a?(Binary) && VarRef(target) === value.left |
| 180 | + OpAssign(VarField(target), Op("#{value.operator}="), value.right) |
| 181 | + else |
| 182 | + Assign(VarField(target), value) |
| 183 | + end |
| 184 | + when SetLocalWC0 |
| 185 | + target = Ident(local_name(insn.index, 0)) |
| 186 | + value = clause.pop |
| 187 | + |
| 188 | + clause << if value.is_a?(Binary) && VarRef(target) === value.left |
| 189 | + OpAssign(VarField(target), Op("#{value.operator}="), value.right) |
| 190 | + else |
| 191 | + Assign(VarField(target), value) |
| 192 | + end |
| 193 | + else |
| 194 | + raise "Unknown instruction #{insn}" |
| 195 | + end |
| 196 | + end |
| 197 | + |
| 198 | + # If there's only one clause, then we don't need a case statement, and |
| 199 | + # we can just disassemble the first clause. |
| 200 | + clauses[label] = clause |
| 201 | + return Statements(clauses.values.first) if clauses.size == 1 |
| 202 | + |
| 203 | + # Here we're going to build up a big case statement that will handle all |
| 204 | + # of the different labels. |
| 205 | + current = nil |
| 206 | + clauses.reverse_each do |current_label, current_clause| |
| 207 | + current = |
| 208 | + When( |
| 209 | + Args([node_for(current_label)]), |
| 210 | + Statements(current_clause), |
| 211 | + current |
| 212 | + ) |
| 213 | + end |
| 214 | + switch = Case(Kw("case"), block_label.ref, current) |
| 215 | + |
| 216 | + # Here we're going to make sure that any locals that were established in |
| 217 | + # the label_0 block are initialized so that scoping rules work |
| 218 | + # correctly. |
| 219 | + stack = [] |
| 220 | + locals = [block_label.name] |
| 221 | + |
| 222 | + clauses[:label_0].each do |node| |
| 223 | + if node.is_a?(Assign) && node.target.is_a?(VarField) && |
| 224 | + node.target.value.is_a?(Ident) |
| 225 | + value = node.target.value.value |
| 226 | + next if locals.include?(value) |
| 227 | + |
| 228 | + stack << Assign(node.target, VarRef(Kw("nil"))) |
| 229 | + locals << value |
| 230 | + end |
| 231 | + end |
| 232 | + |
| 233 | + # Finally, we'll set up the initial label and loop the entire case |
| 234 | + # statement. |
| 235 | + stack << Assign(block_label.field, node_for(:label_0)) |
| 236 | + stack << MethodAddBlock( |
| 237 | + CallNode(nil, nil, Ident("loop"), Args([])), |
| 238 | + BlockNode( |
| 239 | + Kw("do"), |
| 240 | + nil, |
| 241 | + BodyStmt(Statements([switch]), nil, nil, nil, nil) |
| 242 | + ) |
| 243 | + ) |
| 244 | + Statements(stack) |
| 245 | + end |
| 246 | + |
| 247 | + def local_name(index, level) |
| 248 | + current = iseq |
| 249 | + level.times { current = current.parent_iseq } |
| 250 | + current.local_table.locals[index].name.to_s |
| 251 | + end |
| 252 | + end |
| 253 | + end |
| 254 | +end |
0 commit comments