@@ -265,6 +265,16 @@ raise_decode_error(Parser *p)
265265 return -1 ;
266266}
267267
268+ static inline void
269+ raise_unclosed_parentheses_error (Parser * p ) {
270+ int error_lineno = p -> tok -> parenlinenostack [p -> tok -> level - 1 ];
271+ int error_col = p -> tok -> parencolstack [p -> tok -> level - 1 ];
272+ RAISE_ERROR_KNOWN_LOCATION (p , PyExc_SyntaxError ,
273+ error_lineno , error_col ,
274+ "'%c' was never closed" ,
275+ p -> tok -> parenstack [p -> tok -> level - 1 ]);
276+ }
277+
268278static void
269279raise_tokenizer_init_error (PyObject * filename )
270280{
@@ -324,7 +334,11 @@ tokenizer_error(Parser *p)
324334 RAISE_SYNTAX_ERROR ("EOL while scanning string literal" );
325335 return -1 ;
326336 case E_EOF :
327- RAISE_SYNTAX_ERROR ("unexpected EOF while parsing" );
337+ if (p -> tok -> level ) {
338+ raise_unclosed_parentheses_error (p );
339+ } else {
340+ RAISE_SYNTAX_ERROR ("unexpected EOF while parsing" );
341+ }
328342 return -1 ;
329343 case E_DEDENT :
330344 RAISE_INDENTATION_ERROR ("unindent does not match any outer indentation level" );
@@ -1151,6 +1165,52 @@ reset_parser_state(Parser *p)
11511165 p -> call_invalid_rules = 1 ;
11521166}
11531167
1168+ static int
1169+ _PyPegen_check_tokenizer_errors (Parser * p ) {
1170+ // Tokenize the whole input to see if there are any tokenization
1171+ // errors such as mistmatching parentheses. These will get priority
1172+ // over generic syntax errors only if the line number of the error is
1173+ // before the one that we had for the generic error.
1174+
1175+ // We don't want to tokenize to the end for interactive input
1176+ if (p -> tok -> prompt != NULL ) {
1177+ return 0 ;
1178+ }
1179+
1180+
1181+ Token * current_token = p -> known_err_token != NULL ? p -> known_err_token : p -> tokens [p -> fill - 1 ];
1182+ Py_ssize_t current_err_line = current_token -> lineno ;
1183+
1184+ // Save the tokenizer state to restore them later in case we found nothing
1185+ struct tok_state saved_tok ;
1186+ memcpy (& saved_tok , p -> tok , sizeof (struct tok_state ));
1187+
1188+ for (;;) {
1189+ const char * start ;
1190+ const char * end ;
1191+ switch (PyTokenizer_Get (p -> tok , & start , & end )) {
1192+ case ERRORTOKEN :
1193+ if (p -> tok -> level != 0 ) {
1194+ int error_lineno = p -> tok -> parenlinenostack [p -> tok -> level - 1 ];
1195+ if (current_err_line > error_lineno ) {
1196+ raise_unclosed_parentheses_error (p );
1197+ return -1 ;
1198+ }
1199+ }
1200+ break ;
1201+ case ENDMARKER :
1202+ break ;
1203+ default :
1204+ continue ;
1205+ }
1206+ break ;
1207+ }
1208+
1209+ // Restore the tokenizer state
1210+ memcpy (p -> tok , & saved_tok , sizeof (struct tok_state ));
1211+ return 0 ;
1212+ }
1213+
11541214void *
11551215_PyPegen_run_parser (Parser * p )
11561216{
@@ -1164,8 +1224,12 @@ _PyPegen_run_parser(Parser *p)
11641224 if (p -> fill == 0 ) {
11651225 RAISE_SYNTAX_ERROR ("error at start before reading any input" );
11661226 }
1167- else if (p -> tok -> done == E_EOF ) {
1168- RAISE_SYNTAX_ERROR ("unexpected EOF while parsing" );
1227+ else if (p -> tok -> done == E_EOF ) {
1228+ if (p -> tok -> level ) {
1229+ raise_unclosed_parentheses_error (p );
1230+ } else {
1231+ RAISE_SYNTAX_ERROR ("unexpected EOF while parsing" );
1232+ }
11691233 }
11701234 else {
11711235 if (p -> tokens [p -> fill - 1 ]-> type == INDENT ) {
@@ -1175,6 +1239,9 @@ _PyPegen_run_parser(Parser *p)
11751239 RAISE_INDENTATION_ERROR ("unexpected unindent" );
11761240 }
11771241 else {
1242+ if (_PyPegen_check_tokenizer_errors (p )) {
1243+ return NULL ;
1244+ }
11781245 RAISE_SYNTAX_ERROR ("invalid syntax" );
11791246 }
11801247 }
0 commit comments