From 9e09fd005663d6539c2b5570a3cb8c11bf23e311 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 7 Feb 2023 08:33:30 -0500 Subject: [PATCH] Sea of nodes optimizations and convenience functions --- lib/syntax_tree/yarv/control_flow_graph.rb | 4 + lib/syntax_tree/yarv/instruction_sequence.rb | 8 ++ lib/syntax_tree/yarv/sea_of_nodes.rb | 91 +++++++++++++++++--- test/yarv_test.rb | 52 +++++------ 4 files changed, 113 insertions(+), 42 deletions(-) diff --git a/lib/syntax_tree/yarv/control_flow_graph.rb b/lib/syntax_tree/yarv/control_flow_graph.rb index 1a361e5e..73d30208 100644 --- a/lib/syntax_tree/yarv/control_flow_graph.rb +++ b/lib/syntax_tree/yarv/control_flow_graph.rb @@ -203,6 +203,10 @@ def to_dfg DataFlowGraph.compile(self) end + def to_son + to_dfg.to_son + end + def to_mermaid output = StringIO.new output.puts("flowchart TD") diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 918a3c86..821738c9 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -273,6 +273,14 @@ def to_cfg ControlFlowGraph.compile(self) end + def to_dfg + to_cfg.to_dfg + end + + def to_son + to_dfg.to_son + end + def disasm fmt = Disassembler.new fmt.enqueue(self) diff --git a/lib/syntax_tree/yarv/sea_of_nodes.rb b/lib/syntax_tree/yarv/sea_of_nodes.rb index be027f39..fdf905a7 100644 --- a/lib/syntax_tree/yarv/sea_of_nodes.rb +++ b/lib/syntax_tree/yarv/sea_of_nodes.rb @@ -118,7 +118,8 @@ def compile connect_local_graphs_control(local_graphs) connect_local_graphs_data(local_graphs) - cleanup + cleanup_phi_nodes + cleanup_insn_nodes SeaOfNodes.new(dfg, nodes, local_graphs).tap(&:verify) end @@ -311,23 +312,13 @@ def connect_local_graphs_data(local_graphs) # We don't always build things in an optimal way. Go back and fix up # some mess we left. Ideally we wouldn't create these problems in the # first place. - def cleanup + def cleanup_phi_nodes nodes.dup.each do |node| # dup because we're mutating next unless node.is_a?(PhiNode) if node.inputs.size == 1 # Remove phi nodes with a single input. - node.inputs.each do |producer_edge| - node.outputs.each do |consumer_edge| - connect( - producer_edge.from, - consumer_edge.to, - producer_edge.type, - consumer_edge.label - ) - end - end - + connect_over(node) remove(node) elsif node.inputs.map(&:from).uniq.size == 1 # Remove phi nodes where all inputs are the same. @@ -344,6 +335,66 @@ def cleanup end end + # Eliminate as many unnecessary nodes as we can. + def cleanup_insn_nodes + nodes.dup.each do |node| + next unless node.is_a?(InsnNode) + + case node.insn + when AdjustStack + # If there are any inputs to the adjust stack that are immediately + # discarded, we can remove them from the input list. + number = node.insn.number + + node.inputs.dup.each do |input_edge| + next if input_edge.type != :data + + from = input_edge.from + next unless from.is_a?(InsnNode) + + if from.inputs.empty? && from.outputs.size == 1 + number -= 1 + remove(input_edge.from) + elsif from.insn.is_a?(Dup) + number -= 1 + connect_over(from) + remove(from) + + new_edge = node.inputs.last + new_edge.from.outputs.delete(new_edge) + node.inputs.delete(new_edge) + end + end + + if number == 0 + connect_over(node) + remove(node) + else + next_node = + if number == 1 + InsnNode.new(Pop.new, node.offset) + else + InsnNode.new(AdjustStack.new(number), node.offset) + end + + next_node.inputs.concat(node.inputs) + next_node.outputs.concat(node.outputs) + + # Dynamically finding the index of the node in the nodes array + # because we're mutating the array as we go. + nodes[nodes.index(node)] = next_node + end + when Jump + # When you have a jump instruction that only has one input and one + # output, you can just connect over top of it and remove it. + if node.inputs.size == 1 && node.outputs.size == 1 + connect_over(node) + remove(node) + end + end + end + end + # Connect one node to another. def connect(from, to, type, label = nil) raise if from == to @@ -354,6 +405,20 @@ def connect(from, to, type, label = nil) to.inputs << edge end + # Connect all of the inputs to all of the outputs of a node. + def connect_over(node) + node.inputs.each do |producer_edge| + node.outputs.each do |consumer_edge| + connect( + producer_edge.from, + consumer_edge.to, + producer_edge.type, + producer_edge.label + ) + end + end + end + # Remove a node from the graph. def remove(node) node.inputs.each do |producer_edge| diff --git a/test/yarv_test.rb b/test/yarv_test.rb index e6a3adda..a1e89568 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -302,7 +302,7 @@ def test_cfg iseq = SyntaxTree::YARV::InstructionSequence.from(iseq.to_a) cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) - assert_equal(<<~CFG, cfg.disasm) + assert_equal(<<~DISASM, cfg.disasm) == cfg: #@:1 (1,0)-(1,0)> block_0 0000 putobject 100 @@ -325,7 +325,7 @@ def test_cfg 0014 opt_plus 0016 leave == to: leaves - CFG + DISASM end def test_dfg @@ -334,7 +334,7 @@ def test_dfg cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq) dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) - assert_equal(<<~DFG, dfg.disasm) + assert_equal(<<~DISASM, dfg.disasm) == dfg: #@:1 (1,0)-(1,0)> block_0 0000 putobject 100 # out: out_0 @@ -363,7 +363,7 @@ def test_dfg 0014 opt_plus # in: in_0, in_1; out: 16 0016 leave # in: 14 == to: leaves - DFG + DISASM end def test_son @@ -373,14 +373,13 @@ def test_son dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) - assert_equal(<<~SON, son.to_mermaid) + assert_equal(<<~MERMAID, son.to_mermaid) flowchart TD node_0("0000 putobject 14") node_2("0002 putobject_INT2FIX_0_") node_3("0003 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") node_5("0005 branchunless 0011") node_7("0007 putobject -1") - node_9("0009 jump 0012") node_11("0011 putobject_INT2FIX_1_") node_12("0012 putobject 100") node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") @@ -397,28 +396,26 @@ def test_son linkStyle 3 stroke:green; node_5 --> |branch0| node_11 linkStyle 4 stroke:red; - node_5 --> |fallthrough| node_9 + node_5 --> |fallthrough| node_1000 linkStyle 5 stroke:red; node_7 --> |0009| node_1001 linkStyle 6 stroke:green; - node_9 --> |branch0| node_1000 - linkStyle 7 stroke:red; node_11 --> |branch0| node_1000 - linkStyle 8 stroke:red; + linkStyle 7 stroke:red; node_11 --> |0011| node_1001 - linkStyle 9 stroke:green; + linkStyle 8 stroke:green; node_12 --> |1| node_14 - linkStyle 10 stroke:green; + linkStyle 9 stroke:green; node_14 --> node_16 - linkStyle 11 stroke:red; + linkStyle 10 stroke:red; node_14 --> |0| node_16 - linkStyle 12 stroke:green; + linkStyle 11 stroke:green; node_1000 --> node_14 - linkStyle 13 stroke:red; + linkStyle 12 stroke:red; node_1001 -.-> node_1000 node_1001 --> |0| node_14 - linkStyle 15 stroke:green; - SON + linkStyle 14 stroke:green; + MERMAID end def test_son_indirect_basic_block_argument @@ -428,7 +425,7 @@ def test_son_indirect_basic_block_argument dfg = SyntaxTree::YARV::DataFlowGraph.compile(cfg) son = SyntaxTree::YARV::SeaOfNodes.compile(dfg) - assert_equal(<<~SON, son.to_mermaid) + assert_equal(<<~MERMAID, son.to_mermaid) flowchart TD node_0("0000 putobject 100") node_2("0002 putobject 14") @@ -436,7 +433,6 @@ def test_son_indirect_basic_block_argument node_5("0005 opt_lt <calldata!mid:<, argc:1, ARGS_SIMPLE>") node_7("0007 branchunless 0013") node_9("0009 putobject -1") - node_11("0011 jump 0014") node_13("0013 putobject_INT2FIX_1_") node_14("0014 opt_plus <calldata!mid:+, argc:1, ARGS_SIMPLE>") node_16("0016 leave") @@ -454,26 +450,24 @@ def test_son_indirect_basic_block_argument linkStyle 4 stroke:green; node_7 --> |branch0| node_13 linkStyle 5 stroke:red; - node_7 --> |fallthrough| node_11 + node_7 --> |fallthrough| node_1002 linkStyle 6 stroke:red; node_9 --> |0011| node_1004 linkStyle 7 stroke:green; - node_11 --> |branch0| node_1002 - linkStyle 8 stroke:red; node_13 --> |branch0| node_1002 - linkStyle 9 stroke:red; + linkStyle 8 stroke:red; node_13 --> |0013| node_1004 - linkStyle 10 stroke:green; + linkStyle 9 stroke:green; node_14 --> node_16 - linkStyle 11 stroke:red; + linkStyle 10 stroke:red; node_14 --> |0| node_16 - linkStyle 12 stroke:green; + linkStyle 11 stroke:green; node_1002 --> node_14 - linkStyle 13 stroke:red; + linkStyle 12 stroke:red; node_1004 -.-> node_1002 node_1004 --> |1| node_14 - linkStyle 15 stroke:green; - SON + linkStyle 14 stroke:green; + MERMAID end private