@@ -7006,7 +7006,7 @@ decode_code_page_flags(UINT code_page)
70067006 */
70077007static int
70087008decode_code_page_strict (UINT code_page ,
7009- PyUnicodeObject * * v ,
7009+ PyObject * * v ,
70107010 const char * in ,
70117011 int insize )
70127012{
@@ -7022,15 +7022,15 @@ decode_code_page_strict(UINT code_page,
70227022
70237023 if (* v == NULL ) {
70247024 /* Create unicode object */
7025- * v = _PyUnicode_New (outsize );
7025+ * v = ( PyObject * ) _PyUnicode_New (outsize );
70267026 if (* v == NULL )
70277027 return -1 ;
70287028 out = PyUnicode_AS_UNICODE (* v );
70297029 }
70307030 else {
70317031 /* Extend unicode object */
70327032 Py_ssize_t n = PyUnicode_GET_SIZE (* v );
7033- if (PyUnicode_Resize (( PyObject * * ) v , n + outsize ) < 0 )
7033+ if (PyUnicode_Resize (v , n + outsize ) < 0 )
70347034 return -1 ;
70357035 out = PyUnicode_AS_UNICODE (* v ) + n ;
70367036 }
@@ -7057,9 +7057,8 @@ decode_code_page_strict(UINT code_page,
70577057 */
70587058static int
70597059decode_code_page_errors (UINT code_page ,
7060- PyUnicodeObject * * v ,
7061- const char * in ,
7062- int size ,
7060+ PyObject * * v ,
7061+ const char * in , const int size ,
70637062 const char * errors )
70647063{
70657064 const char * startin = in ;
@@ -7103,7 +7102,7 @@ decode_code_page_errors(UINT code_page,
71037102 PyErr_NoMemory ();
71047103 goto error ;
71057104 }
7106- * v = _PyUnicode_New (size * Py_ARRAY_LENGTH (buffer ));
7105+ * v = ( PyObject * ) _PyUnicode_New (size * Py_ARRAY_LENGTH (buffer ));
71077106 if (* v == NULL )
71087107 goto error ;
71097108 startout = PyUnicode_AS_UNICODE (* v );
@@ -7115,7 +7114,7 @@ decode_code_page_errors(UINT code_page,
71157114 PyErr_NoMemory ();
71167115 goto error ;
71177116 }
7118- if (PyUnicode_Resize (( PyObject * * ) v , n + size * Py_ARRAY_LENGTH (buffer )) < 0 )
7117+ if (PyUnicode_Resize (v , n + size * Py_ARRAY_LENGTH (buffer )) < 0 )
71197118 goto error ;
71207119 startout = PyUnicode_AS_UNICODE (* v ) + n ;
71217120 }
@@ -7173,9 +7172,9 @@ decode_code_page_errors(UINT code_page,
71737172 /* Extend unicode object */
71747173 outsize = out - startout ;
71757174 assert (outsize <= PyUnicode_WSTR_LENGTH (* v ));
7176- if (PyUnicode_Resize (( PyObject * * ) v , outsize ) < 0 )
7175+ if (PyUnicode_Resize (v , outsize ) < 0 )
71777176 goto error ;
7178- ret = 0 ;
7177+ ret = size ;
71797178
71807179error :
71817180 Py_XDECREF (encoding_obj );
@@ -7184,50 +7183,13 @@ decode_code_page_errors(UINT code_page,
71847183 return ret ;
71857184}
71867185
7187- /*
7188- * Decode a byte string from a Windows code page into unicode object. If
7189- * 'final' is set, converts trailing lead-byte too.
7190- *
7191- * Returns consumed size if succeed, or raise a WindowsError or
7192- * UnicodeDecodeError exception and returns -1 on error.
7193- */
7194- static int
7195- decode_code_page (UINT code_page ,
7196- PyUnicodeObject * * v ,
7197- const char * s , int size ,
7198- int final , const char * errors )
7199- {
7200- int done ;
7201-
7202- /* Skip trailing lead-byte unless 'final' is set */
7203- if (size == 0 ) {
7204- if (* v == NULL ) {
7205- Py_INCREF (unicode_empty );
7206- * v = (PyUnicodeObject * )unicode_empty ;
7207- if (* v == NULL )
7208- return -1 ;
7209- }
7210- return 0 ;
7211- }
7212-
7213- if (!final && is_dbcs_lead_byte (code_page , s , size - 1 ))
7214- -- size ;
7215-
7216- done = decode_code_page_strict (code_page , v , s , size );
7217- if (done == -2 )
7218- done = decode_code_page_errors (code_page , v , s , size , errors );
7219- return done ;
7220- }
7221-
72227186static PyObject *
72237187decode_code_page_stateful (int code_page ,
7224- const char * s ,
7225- Py_ssize_t size ,
7226- const char * errors ,
7227- Py_ssize_t * consumed )
7188+ const char * s , Py_ssize_t size ,
7189+ const char * errors , Py_ssize_t * consumed )
72287190{
7229- PyUnicodeObject * v = NULL ;
7230- int done ;
7191+ PyObject * v = NULL ;
7192+ int chunk_size , final , converted , done ;
72317193
72327194 if (code_page < 0 ) {
72337195 PyErr_SetString (PyExc_ValueError , "invalid code page number" );
@@ -7237,29 +7199,53 @@ decode_code_page_stateful(int code_page,
72377199 if (consumed )
72387200 * consumed = 0 ;
72397201
7202+ do
7203+ {
72407204#ifdef NEED_RETRY
7241- retry :
7242- if (size > INT_MAX )
7243- done = decode_code_page (code_page , & v , s , INT_MAX , 0 , errors );
7244- else
7205+ if (size > INT_MAX ) {
7206+ chunk_size = INT_MAX ;
7207+ final = 0 ;
7208+ done = 0 ;
7209+ }
7210+ else
72457211#endif
7246- done = decode_code_page (code_page , & v , s , (int )size , !consumed , errors );
7212+ {
7213+ chunk_size = (int )size ;
7214+ final = (consumed == NULL );
7215+ done = 1 ;
7216+ }
72477217
7248- if (done < 0 ) {
7249- Py_XDECREF (v );
7250- return NULL ;
7251- }
7218+ /* Skip trailing lead-byte unless 'final' is set */
7219+ if (!final && is_dbcs_lead_byte (code_page , s , chunk_size - 1 ))
7220+ -- chunk_size ;
72527221
7253- if (consumed )
7254- * consumed += done ;
7222+ if (chunk_size == 0 && done ) {
7223+ if (v != NULL )
7224+ break ;
7225+ Py_INCREF (unicode_empty );
7226+ return unicode_empty ;
7227+ }
72557228
7256- #ifdef NEED_RETRY
7257- if (size > INT_MAX ) {
7258- s += done ;
7259- size -= done ;
7260- goto retry ;
7261- }
7262- #endif
7229+
7230+ converted = decode_code_page_strict (code_page , & v ,
7231+ s , chunk_size );
7232+ if (converted == -2 )
7233+ converted = decode_code_page_errors (code_page , & v ,
7234+ s , chunk_size ,
7235+ errors );
7236+ assert (converted != 0 );
7237+
7238+ if (converted < 0 ) {
7239+ Py_XDECREF (v );
7240+ return NULL ;
7241+ }
7242+
7243+ if (consumed )
7244+ * consumed += converted ;
7245+
7246+ s += converted ;
7247+ size -= converted ;
7248+ } while (!done );
72637249
72647250#ifndef DONT_MAKE_RESULT_READY
72657251 if (_PyUnicode_READY_REPLACE (& v )) {
@@ -7268,7 +7254,7 @@ decode_code_page_stateful(int code_page,
72687254 }
72697255#endif
72707256 assert (_PyUnicode_CheckConsistency (v , 1 ));
7271- return ( PyObject * ) v ;
7257+ return v ;
72727258}
72737259
72747260PyObject *
@@ -7583,40 +7569,6 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
75837569 return ret ;
75847570}
75857571
7586- /*
7587- * Encode a Unicode string to a Windows code page into a byte string.
7588- *
7589- * Returns consumed characters if succeed, or raise a WindowsError and returns
7590- * -1 on other error.
7591- */
7592- static int
7593- encode_code_page_chunk (UINT code_page , PyObject * * outbytes ,
7594- PyObject * unicode , Py_ssize_t unicode_offset ,
7595- const Py_UNICODE * p , int size ,
7596- const char * errors )
7597- {
7598- int done ;
7599-
7600- if (size == 0 ) {
7601- if (* outbytes == NULL ) {
7602- * outbytes = PyBytes_FromStringAndSize (NULL , 0 );
7603- if (* outbytes == NULL )
7604- return -1 ;
7605- }
7606- return 0 ;
7607- }
7608-
7609- done = encode_code_page_strict (code_page , outbytes ,
7610- p , size ,
7611- errors );
7612- if (done == -2 )
7613- done = encode_code_page_errors (code_page , outbytes ,
7614- unicode , unicode_offset ,
7615- p , size ,
7616- errors );
7617- return done ;
7618- }
7619-
76207572static PyObject *
76217573encode_code_page (int code_page ,
76227574 PyObject * unicode ,
@@ -7626,7 +7578,7 @@ encode_code_page(int code_page,
76267578 Py_ssize_t size ;
76277579 PyObject * outbytes = NULL ;
76287580 Py_ssize_t offset ;
7629- int chunk_len , ret ;
7581+ int chunk_len , ret , done ;
76307582
76317583 p = PyUnicode_AsUnicodeAndSize (unicode , & size );
76327584 if (p == NULL )
@@ -7637,20 +7589,32 @@ encode_code_page(int code_page,
76377589 return NULL ;
76387590 }
76397591
7592+ if (size == 0 )
7593+ return PyBytes_FromStringAndSize (NULL , 0 );
7594+
76407595 offset = 0 ;
76417596 do
76427597 {
76437598#ifdef NEED_RETRY
7644- if (size > INT_MAX )
7599+ if (size > INT_MAX ) {
76457600 chunk_len = INT_MAX ;
7601+ done = 0 ;
7602+ }
76467603 else
76477604#endif
7605+ {
76487606 chunk_len = (int )size ;
7649- ret = encode_code_page_chunk (code_page , & outbytes ,
7650- unicode , offset ,
7651- p , chunk_len ,
7652- errors );
7607+ done = 1 ;
7608+ }
76537609
7610+ ret = encode_code_page_strict (code_page , & outbytes ,
7611+ p , chunk_len ,
7612+ errors );
7613+ if (ret == -2 )
7614+ ret = encode_code_page_errors (code_page , & outbytes ,
7615+ unicode , offset ,
7616+ p , chunk_len ,
7617+ errors );
76547618 if (ret < 0 ) {
76557619 Py_XDECREF (outbytes );
76567620 return NULL ;
@@ -7659,7 +7623,7 @@ encode_code_page(int code_page,
76597623 p += chunk_len ;
76607624 offset += chunk_len ;
76617625 size -= chunk_len ;
7662- } while (size != 0 );
7626+ } while (! done );
76637627
76647628 return outbytes ;
76657629}
0 commit comments