Skip to content

Commit da29cc3

Browse files
author
Victor Stinner
committed
Issue python#13441: _PyUnicode_CheckConsistency() dumps the string if the maximum
character is bigger than U+10FFFF and locale.localeconv() dumps the string before decoding it. Temporary hack to debug the issue python#13441.
1 parent a996f1e commit da29cc3

2 files changed

Lines changed: 39 additions & 0 deletions

File tree

Modules/_localemodule.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,23 @@ str2uni(const char* s)
7979
return res2;
8080
}
8181

82+
#ifdef Py_DEBUG
83+
void
84+
dump_str(const char *name, const char *value)
85+
{
86+
size_t i, len = strlen(value);
87+
printf("Decode localeconv() %s: {", name);
88+
for (i=0; i<len; i++) {
89+
unsigned char ch = value[i];
90+
if (i)
91+
printf(" 0x%02x", ch);
92+
else
93+
printf("0x%02x", ch);
94+
}
95+
printf("} (len=%u)\n", len);
96+
}
97+
#endif
98+
8299
/* support functions for formatting floating point numbers */
83100

84101
PyDoc_STRVAR(setlocale__doc__,
@@ -184,11 +201,20 @@ PyLocale_localeconv(PyObject* self)
184201
/* hopefully, the localeconv result survives the C library calls
185202
involved herein */
186203

204+
#ifdef Py_DEBUG
205+
#define RESULT_STRING(s)\
206+
dump_str(#s, l->s); \
207+
x = str2uni(l->s); \
208+
if (!x) goto failed;\
209+
PyDict_SetItemString(result, #s, x);\
210+
Py_XDECREF(x)
211+
#else
187212
#define RESULT_STRING(s)\
188213
x = str2uni(l->s); \
189214
if (!x) goto failed;\
190215
PyDict_SetItemString(result, #s, x);\
191216
Py_XDECREF(x)
217+
#endif
192218

193219
#define RESULT_INT(i)\
194220
x = PyLong_FromLong(l->i);\

Objects/unicodeobject.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,19 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
391391
if (ch > maxchar)
392392
maxchar = ch;
393393
}
394+
if (maxchar > 0x10FFFF) {
395+
printf("Invalid Unicode string! {");
396+
for (i=0; i < ascii->length; i++)
397+
{
398+
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
399+
if (i)
400+
printf(", U+%04x", ch);
401+
else
402+
printf("U+%04x", ch);
403+
}
404+
printf("} (len=%u)\n", ascii->length);
405+
abort();
406+
}
394407
if (kind == PyUnicode_1BYTE_KIND) {
395408
if (ascii->state.ascii == 0) {
396409
assert(maxchar >= 128);

0 commit comments

Comments
 (0)