@@ -198,18 +198,26 @@ class State(IntEnum):
198
198
PARSING_ROWS = auto ()
199
199
FINISHED = auto ()
200
200
201
- outer_border_pattern = r'^(\s*)=+( +=+)+$'
201
+ outer_border_pattern : str
202
+ column_top_prefix : str
203
+ column_top_border : str
204
+ column_end_offset : int
202
205
203
206
_state : int
204
207
_column_starts : List [int ]
208
+ _columns_end : int
205
209
_columns : List [str ]
206
210
_rows : List [List [str ]]
207
211
_max_sizes : List [int ]
208
212
_indent : str
209
213
214
+ def __init__ (self ):
215
+ self ._reset_state ()
216
+
210
217
def _reset_state (self ):
211
218
self ._state = TableParser .State .AWAITS
212
219
self ._column_starts = []
220
+ self ._columns_end = - 1
213
221
self ._columns = []
214
222
self ._rows = []
215
223
self ._max_sizes = []
@@ -222,11 +230,13 @@ def initiate_parsing(self, line: str, current_language: str) -> IBlockBeginning:
222
230
self ._reset_state ()
223
231
match = re .match (self .outer_border_pattern , line )
224
232
assert match
225
- self ._indent = match .group (1 ) or ''
233
+ groups = match .groupdict ()
234
+ self ._indent = groups ['indent' ] or ''
226
235
self ._column_starts = []
227
- previous = ' '
236
+ self ._columns_end = match .end ('column' )
237
+ previous = self .column_top_prefix
228
238
for i , char in enumerate (line ):
229
- if char == '=' and previous == ' ' :
239
+ if char == self . column_top_border and previous == self . column_top_prefix :
230
240
self ._column_starts .append (i )
231
241
previous = char
232
242
self ._max_sizes = [0 for i in self ._column_starts ]
@@ -245,17 +255,24 @@ def consume(self, line: str) -> None:
245
255
# TODO: check integrity?
246
256
self ._state += 1
247
257
elif self ._state == states .PARSING_ROWS :
248
- match = re .match (self .outer_border_pattern , line )
249
- if match :
250
- self ._state += 1
251
- else :
252
- self ._rows .append (self ._split (line ))
258
+ self ._consume_row (line )
259
+
260
+ def _consume_row (self , line : str ):
261
+ match = re .match (self .outer_border_pattern , line )
262
+ if match :
263
+ self ._state += 1
264
+ else :
265
+ self ._rows .append (self ._split (line ))
253
266
254
267
def _split (self , line : str ) -> List [str ]:
255
268
assert self ._column_starts
256
269
fragments = []
257
270
for i , start in enumerate (self ._column_starts ):
258
- end = self ._column_starts [i + 1 ] if i < len (self ._column_starts ) - 1 else None
271
+ end = (
272
+ self ._column_starts [i + 1 ] + self .column_end_offset
273
+ if i < len (self ._column_starts ) - 1 else
274
+ self ._columns_end
275
+ )
259
276
fragment = line [start :end ].strip ()
260
277
self ._max_sizes [i ] = max (self ._max_sizes [i ], len (fragment ))
261
278
fragments .append (fragment )
@@ -281,6 +298,48 @@ def finish_consumption(self, final: bool) -> str:
281
298
return result
282
299
283
300
301
+ class SimpleTableParser (TableParser ):
302
+ outer_border_pattern = r'^(?P<indent>\s*)=+(?P<column> +=+)+$'
303
+ column_top_prefix = ' '
304
+ column_top_border = '='
305
+ column_end_offset = 0
306
+
307
+
308
+ class GridTableParser (TableParser ):
309
+ outer_border_pattern = r'^(?P<indent>\s*)(?P<column>\+-+)+\+$'
310
+ column_top_prefix = '+'
311
+ column_top_border = '-'
312
+ column_end_offset = - 1
313
+
314
+ _expecting_row_content : bool
315
+
316
+ def _reset_state (self ):
317
+ super ()._reset_state ()
318
+ self ._expecting_row_content = True
319
+
320
+ def _is_correct_row (self , line : str ) -> bool :
321
+ stripped = line .lstrip ()
322
+ if self ._expecting_row_content :
323
+ return stripped .startswith ('|' )
324
+ else :
325
+ return stripped .startswith ('+-' )
326
+
327
+ def can_consume (self , line : str ) -> bool :
328
+ return (
329
+ bool (self ._state != TableParser .State .FINISHED )
330
+ and
331
+ (self ._state != TableParser .State .PARSING_ROWS or self ._is_correct_row (line ))
332
+ )
333
+
334
+ def _consume_row (self , line : str ):
335
+ if self ._is_correct_row (line ):
336
+ if self ._expecting_row_content :
337
+ self ._rows .append (self ._split (line ))
338
+ self ._expecting_row_content = not self ._expecting_row_content
339
+ else :
340
+ self ._state += 1
341
+
342
+
284
343
class BlockParser (IParser ):
285
344
enclosure = '```'
286
345
follower : Union ['IParser' , None ] = None
@@ -445,7 +504,8 @@ def initiate_parsing(self, line: str, current_language: str) -> IBlockBeginning:
445
504
MathBlockParser (),
446
505
ExplicitCodeBlockParser (),
447
506
DoubleColonBlockParser (),
448
- TableParser ()
507
+ SimpleTableParser (),
508
+ GridTableParser ()
449
509
]
450
510
451
511
RST_SECTIONS = {
0 commit comments