Skip to content

Commit f2d530f

Browse files
author
Rémi Lapeyre
committed
Override field_size_limit per csv.reader object
1 parent ed1deb0 commit f2d530f

4 files changed

Lines changed: 134 additions & 18 deletions

File tree

Doc/library/csv.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ The :mod:`csv` module defines the following functions:
5353
.. index::
5454
single: universal newlines; csv.reader function
5555

56-
.. function:: reader(csvfile, dialect='excel', **fmtparams)
56+
.. function:: reader(csvfile, dialect='excel', field_size_limit=None, **fmtparams)
5757

5858
Return a reader object which will iterate over lines in the given *csvfile*.
5959
*csvfile* can be any object which supports the :term:`iterator` protocol and returns a
@@ -417,6 +417,10 @@ Reader objects have the following public attributes:
417417
The number of lines read from the source iterator. This is not the same as the
418418
number of records returned, as records can span multiple lines.
419419

420+
.. attribute:: csvreader.field_size_limit
421+
422+
The maximum field size allowed by this reader. If :const:`None`,
423+
*csv.field_size_limit()* is used.
420424

421425
DictReader objects have the following public attribute:
422426

Lib/test/test_csv.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,36 @@ def test_read_bigfield(self):
319319
finally:
320320
csv.field_size_limit(limit)
321321

322+
def test_override_size_limit(self):
323+
line = ',,,'
324+
reader = csv.reader([line])
325+
self.assertEqual(reader.field_size_limit, None)
326+
reader = csv.reader([line], field_size_limit=None)
327+
self.assertEqual(reader.field_size_limit, None)
328+
329+
reader = csv.reader([line], field_size_limit=5)
330+
self.assertEqual(reader.field_size_limit, 5)
331+
reader.field_size_limit = None
332+
self.assertEqual(reader.field_size_limit, None)
333+
reader.field_size_limit = 6
334+
self.assertEqual(reader.field_size_limit, 6)
335+
del reader.field_size_limit
336+
self.assertEqual(reader.field_size_limit, None)
337+
338+
with self.assertRaises(ValueError):
339+
csv.reader([line], field_size_limit=-1)
340+
341+
with self.assertRaises(TypeError):
342+
csv.reader([line], field_size_limit="string")
343+
344+
line = 'long_field,3,4,5'
345+
reader = csv.reader([line, line], field_size_limit=4)
346+
with self.assertRaises(csv.Error):
347+
list(reader)
348+
reader.field_size_limit = 50
349+
self.assertEqual(list(reader), [['long_field', '3', '4', '5']])
350+
351+
322352
def test_read_linenum(self):
323353
r = csv.reader(['line,1', 'line,2', 'line,3'])
324354
self.assertEqual(r.line_num, 0)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
`csv.field_size_limit` can now be overriden for each csv.Reader
2+
instance. Contributed by Rémi Lapeyre.

Modules/_csv.c

Lines changed: 97 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ typedef struct {
102102
Py_ssize_t field_len; /* length of current field */
103103
int numeric_field; /* treat field as numeric */
104104
unsigned long line_num; /* Source-file line number */
105+
long field_limit; /* field limit for current reader */
105106
} ReaderObj;
106107

107108
static PyTypeObject Reader_Type;
@@ -572,13 +573,15 @@ parse_grow_buff(ReaderObj *self)
572573
static int
573574
parse_add_char(ReaderObj *self, Py_UCS4 c)
574575
{
575-
if (self->field_len >= _csvstate_global->field_limit) {
576+
long limit = self->field_limit == -1 ? _csvstate_global->field_limit : self->field_limit;
577+
if (self->field_len >= limit) {
576578
PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
577-
_csvstate_global->field_limit);
579+
limit);
578580
return -1;
579581
}
580-
if (self->field_len == self->field_size && !parse_grow_buff(self))
582+
if (self->field_len == self->field_size && !parse_grow_buff(self)) {
581583
return -1;
584+
}
582585
self->field[self->field_len++] = c;
583586
return 0;
584587
}
@@ -894,6 +897,45 @@ static struct PyMemberDef Reader_memberlist[] = {
894897
{ NULL }
895898
};
896899

900+
static PyObject *
901+
Reader_get_field_size_limit(PyObject *self, void *Py_UNUSED(ignored))
902+
{
903+
ReaderObj *reader = (ReaderObj *)self;
904+
if (reader->field_limit == -1) { // -1 is used as a flag for unset value
905+
Py_RETURN_NONE;
906+
}
907+
else {
908+
return PyLong_FromLong(reader->field_limit);
909+
}
910+
}
911+
912+
static int
913+
Reader_set_field_size_limit(PyObject *self, PyObject *arg, void *Py_UNUSED(ignored))
914+
{
915+
ReaderObj *reader = (ReaderObj *)self;
916+
if (arg == NULL || arg == Py_None) {
917+
reader->field_limit = -1;
918+
return 0;
919+
}
920+
else {
921+
long limit = PyLong_AsLong(arg);
922+
if (limit == -1 && PyErr_Occurred()) {
923+
return -1;
924+
}
925+
if (limit <= 0) {
926+
PyErr_Format(PyExc_ValueError, "field_size_limit must greater than 0");
927+
return -1;
928+
}
929+
reader->field_limit = limit;
930+
return 0;
931+
}
932+
}
933+
934+
static PyGetSetDef Reader_getset[] = {
935+
{ "field_size_limit", Reader_get_field_size_limit,
936+
Reader_set_field_size_limit, PyDoc_STR("field size limit") },
937+
{ NULL },
938+
};
897939

898940
static PyTypeObject Reader_Type = {
899941
PyVarObject_HEAD_INIT(NULL, 0)
@@ -927,18 +969,19 @@ static PyTypeObject Reader_Type = {
927969
(getiterfunc)Reader_iternext, /*tp_iternext*/
928970
Reader_methods, /*tp_methods*/
929971
Reader_memberlist, /*tp_members*/
930-
0, /*tp_getset*/
931-
972+
Reader_getset, /*tp_getset*/
932973
};
933974

934975
static PyObject *
935976
csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
936977
{
937978
PyObject * iterator, * dialect = NULL;
979+
PyObject * _field_size_limit = NULL;
938980
ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
939981

940-
if (!self)
982+
if (!self) {
941983
return NULL;
984+
}
942985

943986
self->dialect = NULL;
944987
self->fields = NULL;
@@ -947,30 +990,67 @@ csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
947990
self->field_size = 0;
948991
self->line_num = 0;
949992

993+
_field_size_limit = PyUnicode_FromString("field_size_limit");
994+
if (_field_size_limit == NULL) {
995+
goto fail;
996+
}
997+
PyObject *field_size_limit = NULL;
998+
if (keyword_args != NULL) {
999+
field_size_limit = PyDict_GetItemWithError(keyword_args, _field_size_limit);
1000+
if (PyErr_Occurred()) {
1001+
goto fail;
1002+
}
1003+
}
1004+
if (field_size_limit == NULL) {
1005+
self->field_limit = -1;
1006+
} else if (field_size_limit == Py_None) {
1007+
self->field_limit = -1;
1008+
if (PyDict_DelItem(keyword_args, _field_size_limit) < 0) {
1009+
goto fail;
1010+
}
1011+
}
1012+
else {
1013+
long limit = PyLong_AsLong(field_size_limit);
1014+
if (PyErr_Occurred()) {
1015+
goto fail;
1016+
}
1017+
if (limit <= 0) {
1018+
PyErr_Format(PyExc_ValueError, "field_size_limit must greater than 0");
1019+
goto fail;
1020+
}
1021+
if (PyDict_DelItem(keyword_args, _field_size_limit) < 0) {
1022+
goto fail;
1023+
}
1024+
self->field_limit = limit;
1025+
}
1026+
Py_DECREF(_field_size_limit);
1027+
_field_size_limit = NULL;
1028+
9501029
if (parse_reset(self) < 0) {
951-
Py_DECREF(self);
952-
return NULL;
1030+
goto fail;
9531031
}
9541032

9551033
if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
956-
Py_DECREF(self);
957-
return NULL;
1034+
goto fail;
9581035
}
9591036
self->input_iter = PyObject_GetIter(iterator);
9601037
if (self->input_iter == NULL) {
9611038
PyErr_SetString(PyExc_TypeError,
9621039
"argument 1 must be an iterator");
963-
Py_DECREF(self);
964-
return NULL;
1040+
goto fail;
9651041
}
9661042
self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
9671043
if (self->dialect == NULL) {
968-
Py_DECREF(self);
969-
return NULL;
1044+
goto fail;
9701045
}
9711046

9721047
PyObject_GC_Track(self);
9731048
return (PyObject *)self;
1049+
1050+
fail:
1051+
Py_XDECREF(_field_size_limit);
1052+
Py_DECREF(self);
1053+
return NULL;
9741054
}
9751055

9761056
/*
@@ -1535,8 +1615,8 @@ PyDoc_STRVAR(csv_module_doc,
15351615
" written as two quotes\n");
15361616

15371617
PyDoc_STRVAR(csv_reader_doc,
1538-
" csv_reader = reader(iterable [, dialect='excel']\n"
1539-
" [optional keyword args])\n"
1618+
" csv_reader = reader(iterable , dialect='excel',\n"
1619+
" field_size_limit=None, **fmtparams)\n"
15401620
" for row in csv_reader:\n"
15411621
" process(row)\n"
15421622
"\n"
@@ -1666,7 +1746,7 @@ PyInit__csv(void)
16661746
return NULL;
16671747

16681748
/* Add the CSV exception object to the module. */
1669-
_csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1749+
_csvstate(module)->error_obj = PyErr_NewException("csv.Error", NULL, NULL);
16701750
if (_csvstate(module)->error_obj == NULL)
16711751
return NULL;
16721752
Py_INCREF(_csvstate(module)->error_obj);

0 commit comments

Comments
 (0)