Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit da19f6a

Browse files
committed
Location information for parser nodes
1 parent e0be579 commit da19f6a

File tree

6 files changed

+774
-966
lines changed

6 files changed

+774
-966
lines changed

lib/syntax_tree/formatter.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def format(node, stackable: true)
138138
# going to just print out the node as it was seen in the source.
139139
doc =
140140
if last_leading&.ignore?
141-
range = source[node.location.start_char...node.location.end_char]
141+
range = source[node.start_char...node.end_char]
142142
first = true
143143

144144
range.each_line(chomp: true) do |line|

lib/syntax_tree/node.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ def format(q)
126126
raise NotImplementedError
127127
end
128128

129+
def start_char
130+
location.start_char
131+
end
132+
133+
def end_char
134+
location.end_char
135+
end
136+
129137
def pretty_print(q)
130138
accept(Visitor::PrettyPrintVisitor.new(q))
131139
end

lib/syntax_tree/parser.rb

Lines changed: 141 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -256,11 +256,37 @@ def find_token(type)
256256
tokens[index] if index
257257
end
258258

259+
def find_token_between(type, left, right)
260+
bounds = left.location.end_char...right.location.start_char
261+
index =
262+
tokens.rindex do |token|
263+
char = token.location.start_char
264+
break if char < bounds.begin
265+
266+
token.is_a?(type) && bounds.cover?(char)
267+
end
268+
269+
tokens[index] if index
270+
end
271+
259272
def find_keyword(name)
260273
index = tokens.rindex { |token| token.is_a?(Kw) && (token.name == name) }
261274
tokens[index] if index
262275
end
263276

277+
def find_keyword_between(name, left, right)
278+
bounds = left.location.end_char...right.location.start_char
279+
index =
280+
tokens.rindex do |token|
281+
char = token.location.start_char
282+
break if char < bounds.begin
283+
284+
token.is_a?(Kw) && (token.name == name) && bounds.cover?(char)
285+
end
286+
287+
tokens[index] if index
288+
end
289+
264290
def find_operator(name)
265291
index = tokens.rindex { |token| token.is_a?(Op) && (token.name == name) }
266292
tokens[index] if index
@@ -645,7 +671,7 @@ def visit_var_ref(node)
645671
end
646672

647673
def self.visit(node, tokens)
648-
start_char = node.location.start_char
674+
start_char = node.start_char
649675
allocated = []
650676

651677
tokens.reverse_each do |token|
@@ -874,13 +900,34 @@ def on_binary(left, operator, right)
874900
# on_block_var: (Params params, (nil | Array[Ident]) locals) -> BlockVar
875901
def on_block_var(params, locals)
876902
index =
877-
tokens.rindex do |node|
878-
node.is_a?(Op) && %w[| ||].include?(node.value) &&
879-
node.location.start_char < params.location.start_char
880-
end
903+
tokens.rindex { |node| node.is_a?(Op) && %w[| ||].include?(node.value) }
904+
905+
ending = tokens.delete_at(index)
906+
beginning = ending.value == "||" ? ending : consume_operator(:|)
907+
908+
# If there are no parameters, then we didn't have anything to base the
909+
# location information of off. Now that we have an opening of the
910+
# block, we can correct this.
911+
if params.empty?
912+
start_line = params.location.start_line
913+
start_char =
914+
(
915+
if beginning.value == "||"
916+
beginning.location.start_char
917+
else
918+
find_next_statement_start(beginning.location.end_char)
919+
end
920+
)
921+
922+
location =
923+
Location.fixed(
924+
line: start_line,
925+
char: start_char,
926+
column: start_char - line_counts[start_line - 1].start
927+
)
881928

882-
beginning = tokens[index]
883-
ending = tokens[-1]
929+
params = params.copy(location: location)
930+
end
884931

885932
BlockVar.new(
886933
params: params,
@@ -1762,15 +1809,13 @@ def on_for(index, collection, statements)
17621809

17631810
# Consume the do keyword if it exists so that it doesn't get confused for
17641811
# some other block
1765-
keyword = find_keyword(:do)
1766-
if keyword &&
1767-
keyword.location.start_char > collection.location.end_char &&
1768-
keyword.location.end_char < ending.location.start_char
1812+
if (keyword = find_keyword_between(:do, collection, ending))
17691813
tokens.delete(keyword)
17701814
end
17711815

17721816
start_char =
17731817
find_next_statement_start((keyword || collection).location.end_char)
1818+
17741819
statements.bind(
17751820
start_char,
17761821
start_char -
@@ -1984,7 +2029,12 @@ def on_if(predicate, statements, consequent)
19842029
beginning = consume_keyword(:if)
19852030
ending = consequent || consume_keyword(:end)
19862031

1987-
start_char = find_next_statement_start(predicate.location.end_char)
2032+
if (keyword = find_keyword_between(:then, predicate, ending))
2033+
tokens.delete(keyword)
2034+
end
2035+
2036+
start_char =
2037+
find_next_statement_start((keyword || predicate).location.end_char)
19882038
statements.bind(
19892039
start_char,
19902040
start_char - line_counts[predicate.location.end_line - 1].start,
@@ -2068,7 +2118,8 @@ def on_in(pattern, statements, consequent)
20682118
statements_start = token
20692119
end
20702120

2071-
start_char = find_next_statement_start(statements_start.location.end_char)
2121+
start_char =
2122+
find_next_statement_start((token || statements_start).location.end_char)
20722123
statements.bind(
20732124
start_char,
20742125
start_char -
@@ -2194,12 +2245,19 @@ def on_lambda(params, statements)
21942245
token.location.start_char > beginning.location.start_char
21952246
end
21962247

2248+
if braces
2249+
opening = consume_token(TLamBeg)
2250+
closing = consume_token(RBrace)
2251+
else
2252+
opening = consume_keyword(:do)
2253+
closing = consume_keyword(:end)
2254+
end
2255+
21972256
# We need to do some special mapping here. Since ripper doesn't support
2198-
# capturing lambda var until 3.2, we need to normalize all of that here.
2257+
# capturing lambda vars, we need to normalize all of that here.
21992258
params =
2200-
case params
2201-
when Paren
2202-
# In this case we've gotten to the <3.2 parentheses wrapping a set of
2259+
if params.is_a?(Paren)
2260+
# In this case we've gotten to the parentheses wrapping a set of
22032261
# parameters case. Here we need to manually scan for lambda locals.
22042262
range = (params.location.start_char + 1)...params.location.end_char
22052263
locals = lambda_locals(source[range])
@@ -2221,25 +2279,28 @@ def on_lambda(params, statements)
22212279

22222280
node.comments.concat(params.comments)
22232281
node
2224-
when Params
2225-
# In this case we've gotten to the <3.2 plain set of parameters. In
2226-
# this case there cannot be lambda locals, so we will wrap the
2227-
# parameters into a lambda var that has no locals.
2282+
else
2283+
# If there are no parameters, then we didn't have anything to base the
2284+
# location information of off. Now that we have an opening of the
2285+
# block, we can correct this.
2286+
if params.empty?
2287+
opening_location = opening.location
2288+
location =
2289+
Location.fixed(
2290+
line: opening_location.start_line,
2291+
char: opening_location.start_char,
2292+
column: opening_location.start_column
2293+
)
2294+
2295+
params = params.copy(location: location)
2296+
end
2297+
2298+
# In this case we've gotten to the plain set of parameters. In this
2299+
# case there cannot be lambda locals, so we will wrap the parameters
2300+
# into a lambda var that has no locals.
22282301
LambdaVar.new(params: params, locals: [], location: params.location)
2229-
when LambdaVar
2230-
# In this case we've gotten to 3.2+ lambda var. In this case we don't
2231-
# need to do anything and can just the value as given.
2232-
params
22332302
end
22342303

2235-
if braces
2236-
opening = consume_token(TLamBeg)
2237-
closing = consume_token(RBrace)
2238-
else
2239-
opening = consume_keyword(:do)
2240-
closing = consume_keyword(:end)
2241-
end
2242-
22432304
start_char = find_next_statement_start(opening.location.end_char)
22442305
statements.bind(
22452306
start_char,
@@ -3134,7 +3195,7 @@ def on_rescue(exceptions, variable, statements, consequent)
31343195
exceptions = exceptions[0] if exceptions.is_a?(Array)
31353196

31363197
last_node = variable || exceptions || keyword
3137-
start_char = find_next_statement_start(last_node.location.end_char)
3198+
start_char = find_next_statement_start(last_node.end_char)
31383199
statements.bind(
31393200
start_char,
31403201
start_char - line_counts[last_node.location.start_line - 1].start,
@@ -3156,7 +3217,7 @@ def on_rescue(exceptions, variable, statements, consequent)
31563217
start_char: keyword.location.end_char + 1,
31573218
start_column: keyword.location.end_column + 1,
31583219
end_line: last_node.location.end_line,
3159-
end_char: last_node.location.end_char,
3220+
end_char: last_node.end_char,
31603221
end_column: last_node.location.end_column
31613222
)
31623223
)
@@ -3267,9 +3328,27 @@ def on_sclass(target, bodystmt)
32673328
)
32683329
end
32693330

3270-
# def on_semicolon(value)
3271-
# value
3272-
# end
3331+
class Semicolon
3332+
attr_reader :location
3333+
3334+
def initialize(location:)
3335+
@location = location
3336+
end
3337+
end
3338+
3339+
# :call-seq:
3340+
# on_semicolon: (String value) -> Semicolon
3341+
def on_semicolon(value)
3342+
tokens << Semicolon.new(
3343+
location:
3344+
Location.token(
3345+
line: lineno,
3346+
char: char_pos,
3347+
column: current_column,
3348+
size: value.size
3349+
)
3350+
)
3351+
end
32733352

32743353
# def on_sp(value)
32753354
# value
@@ -3706,7 +3785,12 @@ def on_unless(predicate, statements, consequent)
37063785
beginning = consume_keyword(:unless)
37073786
ending = consequent || consume_keyword(:end)
37083787

3709-
start_char = find_next_statement_start(predicate.location.end_char)
3788+
if (keyword = find_keyword_between(:then, predicate, ending))
3789+
tokens.delete(keyword)
3790+
end
3791+
3792+
start_char =
3793+
find_next_statement_start((keyword || predicate).location.end_char)
37103794
statements.bind(
37113795
start_char,
37123796
start_char - line_counts[predicate.location.end_line - 1].start,
@@ -3742,16 +3826,16 @@ def on_until(predicate, statements)
37423826
beginning = consume_keyword(:until)
37433827
ending = consume_keyword(:end)
37443828

3745-
# Consume the do keyword if it exists so that it doesn't get confused for
3746-
# some other block
3747-
keyword = find_keyword(:do)
3748-
if keyword && keyword.location.start_char > predicate.location.end_char &&
3749-
keyword.location.end_char < ending.location.start_char
3750-
tokens.delete(keyword)
3751-
end
3829+
delimiter =
3830+
find_keyword_between(:do, predicate, statements) ||
3831+
find_token_between(Semicolon, predicate, statements)
3832+
3833+
tokens.delete(delimiter) if delimiter
37523834

37533835
# Update the Statements location information
3754-
start_char = find_next_statement_start(predicate.location.end_char)
3836+
start_char =
3837+
find_next_statement_start((delimiter || predicate).location.end_char)
3838+
37553839
statements.bind(
37563840
start_char,
37573841
start_char - line_counts[predicate.location.end_line - 1].start,
@@ -3845,7 +3929,8 @@ def on_when(arguments, statements, consequent)
38453929
statements_start = token
38463930
end
38473931

3848-
start_char = find_next_statement_start(statements_start.location.end_char)
3932+
start_char =
3933+
find_next_statement_start((token || statements_start).location.end_char)
38493934

38503935
statements.bind(
38513936
start_char,
@@ -3869,16 +3954,16 @@ def on_while(predicate, statements)
38693954
beginning = consume_keyword(:while)
38703955
ending = consume_keyword(:end)
38713956

3872-
# Consume the do keyword if it exists so that it doesn't get confused for
3873-
# some other block
3874-
keyword = find_keyword(:do)
3875-
if keyword && keyword.location.start_char > predicate.location.end_char &&
3876-
keyword.location.end_char < ending.location.start_char
3877-
tokens.delete(keyword)
3878-
end
3957+
delimiter =
3958+
find_keyword_between(:do, predicate, statements) ||
3959+
find_token_between(Semicolon, predicate, statements)
3960+
3961+
tokens.delete(delimiter) if delimiter
38793962

38803963
# Update the Statements location information
3881-
start_char = find_next_statement_start(predicate.location.end_char)
3964+
start_char =
3965+
find_next_statement_start((delimiter || predicate).location.end_char)
3966+
38823967
statements.bind(
38833968
start_char,
38843969
start_char - line_counts[predicate.location.end_line - 1].start,

0 commit comments

Comments
 (0)