@@ -59,7 +59,7 @@ def tokenize(code: str) -> Generator[Token, Any, None]:
5959 yield Token (kind , value , line_num , column )
6060
6161
62- def cpp_tokenize (code : str ) -> List [ str ]:
62+ def cpp_tokenize (code : str ) -> Generator [ list [ Token ], Any , None ]:
6363 """
6464 The following regex accepts this kind of comment
6565
@@ -89,6 +89,36 @@ def transform(line: str):
8989 yield list (transform (value ))
9090
9191
92+ def txt_tokenize (code : str ) -> Generator [list [Token ], Any , None ]:
93+ """
94+ The following regex accepts this kind of text blocks
95+
96+ --#
97+ * test
98+ * test
99+ * test
100+ * @brief test
101+ #--
102+ """
103+ tok_regex = r"^ *--#\n(\*( .*)\n)+#--\n"
104+ line_num = 1
105+ line_start = 0
106+
107+ def transform (line : str ):
108+ nonlocal line_num
109+ lines = line .split ("\n " )
110+
111+ for e in lines [1 :]:
112+ line = "#" + e .strip ()[1 :]
113+ line_num += 1
114+ yield Token ("COMMENT" , line , line_num , column )
115+
116+ for mo in re .finditer (tok_regex , code , flags = re .MULTILINE ):
117+ value = mo .group ()
118+ column = mo .start () - line_start
119+ yield list (transform (value ))
120+
121+
92122def tree_from_tokens (tokens : List [Token ]) -> List :
93123 if len (tokens ) == 0 :
94124 raise SyntaxError ("unexpected EOF" )
0 commit comments