@@ -14,93 +14,6 @@ module YARV
14
14
# cfg = SyntaxTree::YARV::ControlFlowGraph.compile(iseq)
15
15
#
16
16
class ControlFlowGraph
17
- # This is the instruction sequence that this control flow graph
18
- # corresponds to.
19
- attr_reader :iseq
20
-
21
- # This is the list of instructions that this control flow graph contains.
22
- # It is effectively the same as the list of instructions in the
23
- # instruction sequence but with line numbers and events filtered out.
24
- attr_reader :insns
25
-
26
- # This is the set of basic blocks that this control-flow graph contains.
27
- attr_reader :blocks
28
-
29
- def initialize ( iseq , insns , blocks )
30
- @iseq = iseq
31
- @insns = insns
32
- @blocks = blocks
33
- end
34
-
35
- def disasm
36
- fmt = Disassembler . new ( iseq )
37
- fmt . puts ( "== cfg: #{ iseq . inspect } " )
38
-
39
- blocks . each do |block |
40
- fmt . puts ( block . id )
41
- fmt . with_prefix ( " " ) do |prefix |
42
- unless block . incoming_blocks . empty?
43
- from = block . incoming_blocks . map ( &:id )
44
- fmt . puts ( "#{ prefix } == from: #{ from . join ( ", " ) } " )
45
- end
46
-
47
- fmt . format_insns! ( block . insns , block . block_start )
48
-
49
- to = block . outgoing_blocks . map ( &:id )
50
- to << "leaves" if block . insns . last . leaves?
51
- fmt . puts ( "#{ prefix } == to: #{ to . join ( ", " ) } " )
52
- end
53
- end
54
-
55
- fmt . string
56
- end
57
-
58
- def to_mermaid
59
- output = StringIO . new
60
- output . puts ( "flowchart TD" )
61
-
62
- fmt = Disassembler ::Mermaid . new
63
- blocks . each do |block |
64
- output . puts ( " subgraph #{ block . id } " )
65
- previous = nil
66
-
67
- block . each_with_length do |insn , length |
68
- node_id = "node_#{ length } "
69
- label = "%04d %s" % [ length , insn . disasm ( fmt ) ]
70
-
71
- output . puts ( " #{ node_id } (\" #{ CGI . escapeHTML ( label ) } \" )" )
72
- output . puts ( " #{ previous } --> #{ node_id } " ) if previous
73
-
74
- previous = node_id
75
- end
76
-
77
- output . puts ( " end" )
78
- end
79
-
80
- blocks . each do |block |
81
- block . outgoing_blocks . each do |outgoing |
82
- offset =
83
- block . block_start + block . insns . sum ( &:length ) -
84
- block . insns . last . length
85
-
86
- output . puts ( " node_#{ offset } --> node_#{ outgoing . block_start } " )
87
- end
88
- end
89
-
90
- output . string
91
- end
92
-
93
- # This method is used to verify that the control flow graph is well
94
- # formed. It does this by checking that each basic block is itself well
95
- # formed.
96
- def verify
97
- blocks . each ( &:verify )
98
- end
99
-
100
- def self . compile ( iseq )
101
- Compiler . new ( iseq ) . compile
102
- end
103
-
104
17
# This class is responsible for creating a control flow graph from the
105
18
# given instruction sequence.
106
19
class Compiler
@@ -139,7 +52,11 @@ def initialize(iseq)
139
52
# This method is used to compile the instruction sequence into a control
140
53
# flow graph. It returns an instance of ControlFlowGraph.
141
54
def compile
142
- blocks = connect_basic_blocks ( build_basic_blocks )
55
+ blocks = build_basic_blocks
56
+
57
+ connect_basic_blocks ( blocks )
58
+ prune_basic_blocks ( blocks )
59
+
143
60
ControlFlowGraph . new ( iseq , insns , blocks . values ) . tap ( &:verify )
144
61
end
145
62
@@ -187,7 +104,16 @@ def build_basic_blocks
187
104
188
105
block_starts
189
106
. zip ( blocks )
190
- . to_h do |block_start , block_insns |
107
+ . to_h do |block_start , insns |
108
+ # It's possible that we have not detected a block start but still
109
+ # have branching instructions inside of a basic block. This can
110
+ # happen if you have an unconditional jump which is followed by
111
+ # instructions that are unreachable. As of Ruby 3.2, this is
112
+ # possible with something as simple as "1 => a". In this case we
113
+ # can discard all instructions that follow branching instructions.
114
+ block_insns =
115
+ insns . slice_after { |insn | insn . branch_targets . any? } . first
116
+
191
117
[ block_start , BasicBlock . new ( block_start , block_insns ) ]
192
118
end
193
119
end
@@ -213,6 +139,114 @@ def connect_basic_blocks(blocks)
213
139
end
214
140
end
215
141
end
142
+
143
+ # If there are blocks that are unreachable, we can remove them from the
144
+ # graph entirely at this point.
145
+ def prune_basic_blocks ( blocks )
146
+ visited = Set . new
147
+ queue = [ blocks . fetch ( 0 ) ]
148
+
149
+ until queue . empty?
150
+ current_block = queue . shift
151
+ next if visited . include? ( current_block )
152
+
153
+ visited << current_block
154
+ queue . concat ( current_block . outgoing_blocks )
155
+ end
156
+
157
+ blocks . select! { |_ , block | visited . include? ( block ) }
158
+ end
159
+ end
160
+
161
+ # This is the instruction sequence that this control flow graph
162
+ # corresponds to.
163
+ attr_reader :iseq
164
+
165
+ # This is the list of instructions that this control flow graph contains.
166
+ # It is effectively the same as the list of instructions in the
167
+ # instruction sequence but with line numbers and events filtered out.
168
+ attr_reader :insns
169
+
170
+ # This is the set of basic blocks that this control-flow graph contains.
171
+ attr_reader :blocks
172
+
173
+ def initialize ( iseq , insns , blocks )
174
+ @iseq = iseq
175
+ @insns = insns
176
+ @blocks = blocks
177
+ end
178
+
179
+ def disasm
180
+ fmt = Disassembler . new ( iseq )
181
+ fmt . puts ( "== cfg: #{ iseq . inspect } " )
182
+
183
+ blocks . each do |block |
184
+ fmt . puts ( block . id )
185
+ fmt . with_prefix ( " " ) do |prefix |
186
+ unless block . incoming_blocks . empty?
187
+ from = block . incoming_blocks . map ( &:id )
188
+ fmt . puts ( "#{ prefix } == from: #{ from . join ( ", " ) } " )
189
+ end
190
+
191
+ fmt . format_insns! ( block . insns , block . block_start )
192
+
193
+ to = block . outgoing_blocks . map ( &:id )
194
+ to << "leaves" if block . insns . last . leaves?
195
+ fmt . puts ( "#{ prefix } == to: #{ to . join ( ", " ) } " )
196
+ end
197
+ end
198
+
199
+ fmt . string
200
+ end
201
+
202
+ def to_dfg
203
+ DataFlowGraph . compile ( self )
204
+ end
205
+
206
+ def to_mermaid
207
+ output = StringIO . new
208
+ output . puts ( "flowchart TD" )
209
+
210
+ fmt = Disassembler ::Mermaid . new
211
+ blocks . each do |block |
212
+ output . puts ( " subgraph #{ block . id } " )
213
+ previous = nil
214
+
215
+ block . each_with_length do |insn , length |
216
+ node_id = "node_#{ length } "
217
+ label = "%04d %s" % [ length , insn . disasm ( fmt ) ]
218
+
219
+ output . puts ( " #{ node_id } (\" #{ CGI . escapeHTML ( label ) } \" )" )
220
+ output . puts ( " #{ previous } --> #{ node_id } " ) if previous
221
+
222
+ previous = node_id
223
+ end
224
+
225
+ output . puts ( " end" )
226
+ end
227
+
228
+ blocks . each do |block |
229
+ block . outgoing_blocks . each do |outgoing |
230
+ offset =
231
+ block . block_start + block . insns . sum ( &:length ) -
232
+ block . insns . last . length
233
+
234
+ output . puts ( " node_#{ offset } --> node_#{ outgoing . block_start } " )
235
+ end
236
+ end
237
+
238
+ output . string
239
+ end
240
+
241
+ # This method is used to verify that the control flow graph is well
242
+ # formed. It does this by checking that each basic block is itself well
243
+ # formed.
244
+ def verify
245
+ blocks . each ( &:verify )
246
+ end
247
+
248
+ def self . compile ( iseq )
249
+ Compiler . new ( iseq ) . compile
216
250
end
217
251
end
218
252
end
0 commit comments