1
1
from __future__ import absolute_import , division , unicode_literals
2
2
3
+ import collections
3
4
import os
4
5
import sys
5
6
import codecs
6
7
import glob
7
8
import xml .sax .handler
8
9
10
+ from six import text_type
11
+ from six .moves import range
12
+
13
+ try :
14
+ from collections import OrderedDict
15
+ except ImportError :
16
+ from ordereddict import OrderedDict
17
+
9
18
base_path = os .path .split (__file__ )[0 ]
10
19
11
20
test_dir = os .path .join (base_path , 'testdata' )
14
23
os .path .pardir )))
15
24
16
25
from html5lib import treebuilders
26
+ from html5lib .utils import py2_str_unicode
17
27
del base_path
18
28
19
29
# Build a dict of avaliable trees
@@ -47,6 +57,17 @@ def get_data_files(subdirectory, files='*.dat'):
47
57
return sorted (glob .glob (os .path .join (test_dir , subdirectory , files )))
48
58
49
59
60
+ def isSubsequence (l1 , l2 ):
61
+ """checks if l2 is a subsequence of l1"""
62
+ j = 0
63
+ for i in range (len (l1 )):
64
+ if l1 [i ] == l2 [j ]:
65
+ j += 1
66
+ if j >= len (l2 ):
67
+ return True
68
+ return False
69
+
70
+
50
71
class DefaultDict (dict ):
51
72
def __init__ (self , default , * args , ** kwargs ):
52
73
self .default = default
@@ -65,6 +86,9 @@ def __init__(self, filename, newTestHeading="data", encoding="utf8"):
65
86
self .encoding = encoding
66
87
self .newTestHeading = newTestHeading
67
88
89
+ def __del__ (self ):
90
+ self .f .close ()
91
+
68
92
def __iter__ (self ):
69
93
data = DefaultDict (None )
70
94
key = None
@@ -100,6 +124,120 @@ def normaliseOutput(self, data):
100
124
return data
101
125
102
126
127
+ def _getTestData (isUnicode ):
128
+ class _TestData2 (object ):
129
+ def __init__ (self , f , headings ):
130
+ self .f = f
131
+ self .headings = headings
132
+
133
+ def __iter__ (self ):
134
+ newTestHeading = self .headings [0 ]
135
+
136
+ data = OrderedDict ()
137
+ key = None
138
+ for line in self .f :
139
+ if line .startswith ("#" if isUnicode else b"#" ):
140
+ heading = line [1 :].strip ()
141
+ if data and heading == newTestHeading :
142
+ self ._normalize_newlines (data , last = False )
143
+ yield (TestUnicode (self .headings , data ) if isUnicode else TestBytes (self .headings , data ))
144
+ data = OrderedDict ()
145
+ key = heading
146
+ data [key ] = "" if isUnicode else b""
147
+ elif key is not None :
148
+ data [key ] += line
149
+ if data :
150
+ self ._normalize_newlines (data , last = True )
151
+ yield (TestUnicode (self .headings , data ) if isUnicode else TestBytes (self .headings , data ))
152
+
153
+ def _normalize_newlines (self , data , last ):
154
+ for key , value in data .items ():
155
+ if value :
156
+ assert value [- 1 ] == "\n " if isUnicode else b"\n "
157
+ data [key ] = value [:- 1 ]
158
+ if value and not last :
159
+ assert value [- 2 :] == "\n \n " if isUnicode else b"\n \n "
160
+ data [key ] = value [:- 2 ]
161
+
162
+
163
+ return _TestData2
164
+ TestDataUnicode = _getTestData (True )
165
+ TestDataBytes = _getTestData (False )
166
+
167
+
168
+ class Test (object ):
169
+ def __init__ (self , headings , d ):
170
+ if len (headings ) != len (set (headings )):
171
+ raise ValueError ("headings must not contain duplicates" )
172
+ if not isSubsequence (headings , list (d .keys ())):
173
+ raise ValueError ("test headings must be a subsequence of expected headings, got %s, expected %s" % (list (d .keys ()), headings ))
174
+ if len (d ) == len (headings ):
175
+ self ._d = d
176
+ else :
177
+ e = OrderedDict ()
178
+ for heading in headings :
179
+ if heading in d :
180
+ e [heading ] = d [heading ]
181
+ else :
182
+ e [heading ] = None
183
+ self ._d = e
184
+
185
+ def __getitem__ (self , k ):
186
+ return self ._d [k ]
187
+
188
+ def __len__ (self ):
189
+ return len (self ._d )
190
+
191
+ def __iter__ (self ):
192
+ return iter (self ._d )
193
+
194
+ def __contains__ (self , k ):
195
+ return k in self ._d
196
+
197
+ def keys (self ):
198
+ return self ._d .keys ()
199
+
200
+ def items (self ):
201
+ return self ._d .items ()
202
+
203
+ def values (self ):
204
+ return self ._d .values ()
205
+
206
+ def get (self , k , d = None ):
207
+ return self ._d .get (k , d )
208
+
209
+ def __eq__ (self , o ):
210
+ return self ._d == o ._d
211
+
212
+ def __ne__ (self , o ):
213
+ return self ._d != o ._d
214
+
215
+ def __hash__ (self ):
216
+ return hash (self ._d .items ())
217
+
218
+
219
+ @py2_str_unicode
220
+ class TestBytes (Test ):
221
+ def __bytes__ (self ):
222
+ r = []
223
+ for heading , content in self .items ():
224
+ r .append (b"#" + heading )
225
+ r .append (content )
226
+ r .append (b"" )
227
+ return b"\n " .join (r )
228
+
229
+
230
+ @py2_str_unicode
231
+ class TestUnicode (Test ):
232
+ def __unicode__ (self ):
233
+ r = []
234
+ for heading , content in self .items ():
235
+ r .append ("#" + heading )
236
+ r .append (content )
237
+ r .append ("" )
238
+ return "\n " .join (r )
239
+
240
+
103
241
def convert (stripChars ):
104
242
def convertData (data ):
105
243
"""convert the output of str(document) to the format used in the testcases"""
0 commit comments