From 19282b47d16b6fc50b3457623e1266a04634f4d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= Date: Sat, 29 Jan 2022 22:30:11 +0100 Subject: [PATCH 1/7] add dtype keyword to loadtxt --- code/numpy/io/io.c | 23 +++++++++++----- docs/manual/source/conf.py | 2 +- docs/manual/source/numpy-functions.rst | 24 +++++++++++++---- docs/numpy-functions.ipynb | 36 +++++++++++++++++--------- docs/ulab-change-log.md | 6 +++++ docs/ulab-convert.ipynb | 18 ++++++------- tests/2d/numpy/loadtxt.py | 13 ++++++++-- tests/2d/numpy/loadtxt.py.exp | 17 ++++++++++++ 8 files changed, 104 insertions(+), 35 deletions(-) diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c index 9bdd9ee2..35d47679 100644 --- a/code/numpy/io/io.c +++ b/code/numpy/io/io.c @@ -8,6 +8,7 @@ * Copyright (c) 2022 Zoltán Vörös */ +#include #include #include "py/builtin.h" @@ -243,6 +244,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw { MP_QSTR_comments, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } }, { MP_QSTR_max_rows, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = -1 } }, { MP_QSTR_usecols, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } }, + { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } }, }; mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)]; @@ -304,6 +306,8 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw } } + uint8_t dtype = args[5].u_int; + // count the columns and rows // we actually count only the rows and the items, and assume that // the number of columns can be gotten by means of a simple division, @@ -363,7 +367,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw #if ULAB_MAX_DIMS == 1 shape[0] = rows; - ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(1, shape, NDARRAY_FLOAT); + ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(1, shape, dtype); #else if(args[4].u_obj == mp_const_none) { shape[ULAB_MAX_DIMS - 1] = columns; @@ -371,11 +375,9 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw shape[ULAB_MAX_DIMS - 1] = used_columns; } shape[ULAB_MAX_DIMS - 2] = rows; - ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(2, shape, NDARRAY_FLOAT); + ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(2, shape, dtype); #endif - mp_float_t *array = (mp_float_t *)ndarray->array; - struct mp_stream_seek_t seek_s; seek_s.offset = 0; seek_s.whence = MP_SEEK_SET; @@ -388,6 +390,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw rows = 0; columns = 0; + size_t idx = 0; do { read = stream_p->read(stream, buffer, ULAB_IO_BUFFER_SIZE - 1, &error); buffer[read] = '\0'; @@ -421,12 +424,20 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw #else if(args[4].u_obj == mp_const_none) { mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL); - *array++ = mp_obj_get_float(value); + if(dtype != NDARRAY_FLOAT) { + mp_float_t _value = mp_obj_get_float(value); + value = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(_value)); + } + ndarray_set_value(dtype, ndarray->array, idx++, value); } else { for(uint8_t c = 0; c < used_columns; c++) { if(columns == cols[c]) { mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL); - *array++ = mp_obj_get_float(value); + if(dtype != NDARRAY_FLOAT) { + mp_float_t _value = mp_obj_get_float(value); + value = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(_value)); + } + ndarray_set_value(dtype, ndarray->array, idx++, value); break; } } diff --git a/docs/manual/source/conf.py b/docs/manual/source/conf.py index 46f44ea3..80344b5d 100644 --- a/docs/manual/source/conf.py +++ b/docs/manual/source/conf.py @@ -27,7 +27,7 @@ author = 'Zoltán Vörös' # The full version, including alpha/beta/rc tags -release = '4.4.0' +release = '4.4.1' # -- General configuration --------------------------------------------------- diff --git a/docs/manual/source/numpy-functions.rst b/docs/manual/source/numpy-functions.rst index 4bd4ffbe..01660cc0 100644 --- a/docs/manual/source/numpy-functions.rst +++ b/docs/manual/source/numpy-functions.rst @@ -1027,10 +1027,12 @@ https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html The function reads data from a text file, and returns the generated array. It takes a file name as the single positional argument, and the -``comments`` (with a default value of ``#``), the ``delimiter`` (with a -default value of ``,``), ``usecols`` (with a default of all columns), -and ``max_rows`` (with a default of all rows) keyword arguments. The -array returned is always of type ``float``. +``dtype`` (with a default value of ``float``), the ``comments`` (with a +default value of ``#``), the ``delimiter`` (with a default value of +``,``), ``usecols`` (with a default of all columns), and the +``max_rows`` (with a default of all rows) keyword arguments. If +``dtype`` is supplied and is not ``float``, the data entries will be +converted to the appropriate integer type by rounding the values. .. code:: @@ -1040,8 +1042,12 @@ array returned is always of type ``float``. print('read all data') print(np.loadtxt('loadtxt.dat')) + print('\nread maximum 5 rows (first row is a comment line)') print(np.loadtxt('loadtxt.dat', max_rows=5)) + + print('\nread maximum 5 rows, convert dtype') + print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8)) .. parsed-literal:: @@ -1052,7 +1058,9 @@ array returned is always of type ``float``. [12.0, 13.0, 14.0, 15.0], [16.0, 17.0, 18.0, 19.0], [20.0, 21.0, 22.0, 23.0], - [24.0, 25.0, 26.0, 27.0]], dtype=float64) + [24.0, 25.0, 26.0, 27.0], + [28.00000000000001, 29.0, 30.0, 31.0], + [32.0, 33.0, 34.00000000000001, 35.0]], dtype=float64) read maximum 5 rows (first row is a comment line) array([[0.0, 1.0, 2.0, 3.0], @@ -1060,6 +1068,12 @@ array returned is always of type ``float``. [8.0, 9.0, 10.0, 11.0], [12.0, 13.0, 14.0, 15.0]], dtype=float64) + read maximum 5 rows, convert dtype + array([[0, 1, 2, 3], + [4, 5, 6, 7], + [8, 9, 10, 11], + [12, 13, 14, 15]], dtype=uint8) + diff --git a/docs/numpy-functions.ipynb b/docs/numpy-functions.ipynb index ea4999c0..f7383c84 100644 --- a/docs/numpy-functions.ipynb +++ b/docs/numpy-functions.ipynb @@ -31,11 +31,11 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2022-01-28T18:34:31.017702Z", - "start_time": "2022-01-28T18:34:31.010354Z" + "end_time": "2022-01-29T21:24:54.931042Z", + "start_time": "2022-01-29T21:24:54.927243Z" } }, "outputs": [], @@ -49,11 +49,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2022-01-28T18:34:31.565147Z", - "start_time": "2022-01-28T18:34:31.550395Z" + "end_time": "2022-01-29T21:24:55.649634Z", + "start_time": "2022-01-29T21:24:55.626921Z" } }, "outputs": [], @@ -1474,16 +1474,16 @@ "\n", "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html\n", "\n", - "The function reads data from a text file, and returns the generated array. It takes a file name as the single positional argument, and the `comments` (with a default value of `#`), the `delimiter` (with a default value of `,`), `usecols` (with a default of all columns), and `max_rows` (with a default of all rows) keyword arguments. The array returned is always of type `float`." + "The function reads data from a text file, and returns the generated array. It takes a file name as the single positional argument, and the `dtype` (with a default value of `float`), the `comments` (with a default value of `#`), the `delimiter` (with a default value of `,`), `usecols` (with a default of all columns), and the `max_rows` (with a default of all rows) keyword arguments. If `dtype` is supplied and is not `float`, the data entries will be converted to the appropriate integer type by rounding the values." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 6, "metadata": { "ExecuteTime": { - "end_time": "2022-01-28T18:47:52.346814Z", - "start_time": "2022-01-28T18:47:52.291552Z" + "end_time": "2022-01-29T21:26:36.258135Z", + "start_time": "2022-01-29T21:26:36.236256Z" } }, "outputs": [ @@ -1498,7 +1498,9 @@ " [12.0, 13.0, 14.0, 15.0],\n", " [16.0, 17.0, 18.0, 19.0],\n", " [20.0, 21.0, 22.0, 23.0],\n", - " [24.0, 25.0, 26.0, 27.0]], dtype=float64)\n", + " [24.0, 25.0, 26.0, 27.0],\n", + " [28.00000000000001, 29.0, 30.0, 31.0],\n", + " [32.0, 33.0, 34.00000000000001, 35.0]], dtype=float64)\n", "\n", "read maximum 5 rows (first row is a comment line)\n", "array([[0.0, 1.0, 2.0, 3.0],\n", @@ -1506,6 +1508,12 @@ " [8.0, 9.0, 10.0, 11.0],\n", " [12.0, 13.0, 14.0, 15.0]], dtype=float64)\n", "\n", + "read maximum 5 rows, convert dtype\n", + "array([[0, 1, 2, 3],\n", + " [4, 5, 6, 7],\n", + " [8, 9, 10, 11],\n", + " [12, 13, 14, 15]], dtype=uint8)\n", + "\n", "\n" ] } @@ -1517,8 +1525,12 @@ "\n", "print('read all data')\n", "print(np.loadtxt('loadtxt.dat'))\n", + "\n", "print('\\nread maximum 5 rows (first row is a comment line)')\n", - "print(np.loadtxt('loadtxt.dat', max_rows=5))" + "print(np.loadtxt('loadtxt.dat', max_rows=5))\n", + "\n", + "print('\\nread maximum 5 rows, convert dtype')\n", + "print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8))" ] }, { diff --git a/docs/ulab-change-log.md b/docs/ulab-change-log.md index 41b1fbce..981acb95 100644 --- a/docs/ulab-change-log.md +++ b/docs/ulab-change-log.md @@ -1,3 +1,9 @@ +Sat, 29 Jan 2022 + +version 4.4.1 + + add dtype keyword to loadtxt + Tue, 15 Jan 2022 version 4.3.2 diff --git a/docs/ulab-convert.ipynb b/docs/ulab-convert.ipynb index 80515e3d..1c6bcb27 100644 --- a/docs/ulab-convert.ipynb +++ b/docs/ulab-convert.ipynb @@ -17,8 +17,8 @@ "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2022-01-28T18:33:51.163571Z", - "start_time": "2022-01-28T18:33:51.156339Z" + "end_time": "2022-01-29T21:27:54.988801Z", + "start_time": "2022-01-29T21:27:54.980856Z" } }, "outputs": [ @@ -61,7 +61,7 @@ "author = 'Zoltán Vörös'\n", "\n", "# The full version, including alpha/beta/rc tags\n", - "release = '4.4.0'\n", + "release = '4.4.1'\n", "\n", "\n", "# -- General configuration ---------------------------------------------------\n", @@ -215,11 +215,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2022-01-28T18:56:20.180430Z", - "start_time": "2022-01-28T18:56:19.953451Z" + "end_time": "2022-01-29T21:27:59.573556Z", + "start_time": "2022-01-29T21:27:57.323819Z" } }, "outputs": [], @@ -256,11 +256,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2022-01-28T18:56:26.741592Z", - "start_time": "2022-01-28T18:56:21.395976Z" + "end_time": "2022-01-29T21:28:16.742315Z", + "start_time": "2022-01-29T21:28:11.284954Z" } }, "outputs": [], diff --git a/tests/2d/numpy/loadtxt.py b/tests/2d/numpy/loadtxt.py index 1e2d8534..26ddfe20 100644 --- a/tests/2d/numpy/loadtxt.py +++ b/tests/2d/numpy/loadtxt.py @@ -3,22 +3,31 @@ except: import numpy as np +dtypes = (np.uint8, np.int8, np.uint16, np.int16) + a = np.array(range(8)).reshape((2, 4)) np.savetxt('loadtxt.dat', a, header='test file data') print(np.loadtxt('loadtxt.dat')) +print() + +for dtype in dtypes: + print(np.loadtxt('loadtxt.dat', dtype=dtype)) + print() np.savetxt('loadtxt.dat', a, delimiter=',', header='test file data') print(np.loadtxt('loadtxt.dat', delimiter=',')) - +print() np.savetxt('loadtxt.dat', a, delimiter=',', comments='!', header='test file data') print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!')) +print() print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!', usecols=1)) +print() print(np.loadtxt('loadtxt.dat', delimiter=',', comments='!', usecols=(0, 1))) - +print() a = np.array(range(36)).reshape((9, 4)) np.savetxt('loadtxt.dat', a, header='9 data rows and a comment') diff --git a/tests/2d/numpy/loadtxt.py.exp b/tests/2d/numpy/loadtxt.py.exp index c09f739d..373312f4 100644 --- a/tests/2d/numpy/loadtxt.py.exp +++ b/tests/2d/numpy/loadtxt.py.exp @@ -1,13 +1,30 @@ array([[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0]], dtype=float64) + +array([[0, 1, 2, 3], + [4, 5, 6, 7]], dtype=uint8) + +array([[0, 1, 2, 3], + [4, 5, 6, 7]], dtype=int8) + +array([[0, 1, 2, 3], + [4, 5, 6, 7]], dtype=uint16) + +array([[0, 1, 2, 3], + [4, 5, 6, 7]], dtype=int16) + array([[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0]], dtype=float64) + array([[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0]], dtype=float64) + array([[1.0], [5.0]], dtype=float64) + array([[0.0, 1.0], [4.0, 5.0]], dtype=float64) + array([[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0], From e980564d64e287127f9e0d972582f9648eea2a0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= Date: Sat, 29 Jan 2022 22:41:03 +0100 Subject: [PATCH 2/7] assign array elements via function --- code/numpy/io/io.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c index 35d47679..e2046c06 100644 --- a/code/numpy/io/io.c +++ b/code/numpy/io/io.c @@ -237,6 +237,15 @@ MP_DEFINE_CONST_FUN_OBJ_1(io_load_obj, io_load); #endif /* ULAB_NUMPY_HAS_LOAD */ #if ULAB_NUMPY_HAS_LOADTXT +static void io_assign_value(const char *clipboard, uint8_t len, ndarray_obj_t *ndarray, size_t *idx, uint8_t dtype) { + mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL); + if(dtype != NDARRAY_FLOAT) { + mp_float_t _value = mp_obj_get_float(value); + value = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(_value)); + } + ndarray_set_value(dtype, ndarray->array, (*idx)++, value); +} + static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { static const mp_arg_t allowed_args[] = { { MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } }, @@ -418,26 +427,15 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw clipboard = clipboard_origin; #if ULAB_MAX_DIMS == 1 if(columns == cols[0]) { - mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL); - *array++ = mp_obj_get_float(value); + io_assign_value(clipboard, len, ndarray, &idx, dtype); } #else if(args[4].u_obj == mp_const_none) { - mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL); - if(dtype != NDARRAY_FLOAT) { - mp_float_t _value = mp_obj_get_float(value); - value = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(_value)); - } - ndarray_set_value(dtype, ndarray->array, idx++, value); + io_assign_value(clipboard, len, ndarray, &idx, dtype); } else { for(uint8_t c = 0; c < used_columns; c++) { if(columns == cols[c]) { - mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL); - if(dtype != NDARRAY_FLOAT) { - mp_float_t _value = mp_obj_get_float(value); - value = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(_value)); - } - ndarray_set_value(dtype, ndarray->array, idx++, value); + io_assign_value(clipboard, len, ndarray, &idx, dtype); break; } } From e8c89935abeee193428924fd58671f4db2a9a8ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= Date: Sun, 30 Jan 2022 19:55:59 +0100 Subject: [PATCH 3/7] fix array dtype conversion, linalg float constants --- code/ndarray.c | 2 +- code/numpy/linalg/linalg.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/code/ndarray.c b/code/ndarray.c index 3fc37612..d18d9f27 100644 --- a/code/ndarray.c +++ b/code/ndarray.c @@ -775,7 +775,7 @@ ndarray_obj_t *ndarray_copy_view_convert_type(ndarray_obj_t *source, uint8_t dty if((source->dtype == NDARRAY_FLOAT) && (dtype != NDARRAY_FLOAT)) { // floats must be treated separately, because they can't directly be converted to integer types mp_float_t f = ndarray_get_float_value(sarray, source->dtype); - item = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(floor)(f)); + item = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(f)); } else { item = mp_binary_get_val_array(source->dtype, sarray, 0); } diff --git a/code/numpy/linalg/linalg.c b/code/numpy/linalg/linalg.c index d9343fb8..478503cf 100644 --- a/code/numpy/linalg/linalg.c +++ b/code/numpy/linalg/linalg.c @@ -435,22 +435,22 @@ static mp_obj_t linalg_qr(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_ // [[c s], // [s -c]] if(MICROPY_FLOAT_C_FUN(fabs)(rarray[i * n + j]) < LINALG_EPSILON) { // r[i, j] - c = (rarray[(i - 1) * n + j] >= 0.0) ? 1.0 : -1.0; // r[i-1, j] + c = (rarray[(i - 1) * n + j] >= MICROPY_FLOAT_CONST(0.0)) ? MICROPY_FLOAT_CONST(1.0) : MICROPY_FLOAT_CONST(-1.0); // r[i-1, j] s = 0.0; } else if(MICROPY_FLOAT_C_FUN(fabs)(rarray[(i - 1) * n + j]) < LINALG_EPSILON) { // r[i-1, j] c = 0.0; - s = (rarray[i * n + j] >= 0.0) ? -1.0 : 1.0; // r[i, j] + s = (rarray[i * n + j] >= MICROPY_FLOAT_CONST(0.0)) ? MICROPY_FLOAT_CONST(-1.0) : MICROPY_FLOAT_CONST(1.0); // r[i, j] } else { mp_float_t t, u; if(MICROPY_FLOAT_C_FUN(fabs)(rarray[(i - 1) * n + j]) > MICROPY_FLOAT_C_FUN(fabs)(rarray[i * n + j])) { // r[i-1, j], r[i, j] t = rarray[i * n + j] / rarray[(i - 1) * n + j]; // r[i, j]/r[i-1, j] u = MICROPY_FLOAT_C_FUN(sqrt)(1 + t * t); - c = -1.0 / u; + c = MICROPY_FLOAT_CONST(-1.0) / u; s = c * t; } else { t = rarray[(i - 1) * n + j] / rarray[i * n + j]; // r[i-1, j]/r[i, j] u = MICROPY_FLOAT_C_FUN(sqrt)(1 + t * t); - s = -1.0 / u; + s = MICROPY_FLOAT_CONST(-1.0) / u; c = s * t; } } From 65d6f8e947c65dd9bc3abd4ec33696b1148b7e07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= Date: Sun, 30 Jan 2022 22:47:44 +0100 Subject: [PATCH 4/7] fix buffer alignment error --- code/numpy/io/io.c | 59 ++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c index e2046c06..418ef0e4 100644 --- a/code/numpy/io/io.c +++ b/code/numpy/io/io.c @@ -324,6 +324,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw char *offset; uint16_t rows = 0, items = 0, all_rows = 0; uint8_t read; + uint8_t len = 0; do { read = (uint8_t)stream_p->read(stream, buffer, ULAB_IO_BUFFER_SIZE - 1, &error); @@ -347,6 +348,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw rows++; all_rows++; items++; + len = 0; if(all_rows == max_rows) { break; } @@ -358,9 +360,13 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw while((*offset == ' ') || (*offset == '\t') || (*offset == '\v') || (*offset == '\f') || (*offset == '\r')) { offset++; } - items++; + if(len > 0) { + items++; + len = 0; + } } else { offset++; + len++; } } } while((read > 0) && (all_rows < max_rows)); @@ -394,10 +400,10 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw char *clipboard = m_new(char, ULAB_IO_CLIPBOARD_SIZE); char *clipboard_origin = clipboard; - uint8_t len = 0; rows = 0; columns = 0; + len = 0; size_t idx = 0; do { @@ -421,34 +427,37 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw if((*offset == ' ') || (*offset == '\t') || (*offset == '\v') || (*offset == '\f') || (*offset == '\r') || (*offset == '\n') || (*offset == delimiter)) { offset++; - while((*offset == ' ') || (*offset == '\t') || (*offset == '\v') || (*offset == '\f') || (*offset == '\r') || (*offset == '\n')) { + while((*offset == ' ') || (*offset == '\t') || (*offset == '\v') || + (*offset == '\f') || (*offset == '\r') || (*offset == '\n')) { offset++; } - clipboard = clipboard_origin; - #if ULAB_MAX_DIMS == 1 - if(columns == cols[0]) { - io_assign_value(clipboard, len, ndarray, &idx, dtype); - } - #else - if(args[4].u_obj == mp_const_none) { - io_assign_value(clipboard, len, ndarray, &idx, dtype); - } else { - for(uint8_t c = 0; c < used_columns; c++) { - if(columns == cols[c]) { - io_assign_value(clipboard, len, ndarray, &idx, dtype); - break; + if(len > 0) { + clipboard = clipboard_origin; + #if ULAB_MAX_DIMS == 1 + if(columns == cols[0]) { + io_assign_value(clipboard, len, ndarray, &idx, dtype); + } + #else + if(args[4].u_obj == mp_const_none) { + io_assign_value(clipboard, len, ndarray, &idx, dtype); + } else { + for(uint8_t c = 0; c < used_columns; c++) { + if(columns == cols[c]) { + io_assign_value(clipboard, len, ndarray, &idx, dtype); + break; + } } } - } - #endif - columns++; - len = 0; + #endif + columns++; + len = 0; - if(offset[-1] == '\n') { - columns = 0; - rows++; - if(rows == max_rows) { - break; + if(offset[-1] == '\n') { + columns = 0; + rows++; + if(rows == max_rows) { + break; + } } } } else { From f5f42c364349a4dae571fe091dd6c61654a6a7c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= Date: Mon, 31 Jan 2022 22:12:00 +0100 Subject: [PATCH 5/7] add skiprows keyword --- code/numpy/io/io.c | 54 +++++++++++++++++++++-------------- tests/2d/numpy/loadtxt.py | 3 ++ tests/2d/numpy/loadtxt.py.exp | 6 ++++ 3 files changed, 42 insertions(+), 21 deletions(-) diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c index 418ef0e4..9edbdce2 100644 --- a/code/numpy/io/io.c +++ b/code/numpy/io/io.c @@ -254,6 +254,7 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw { MP_QSTR_max_rows, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = -1 } }, { MP_QSTR_usecols, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } }, { MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } }, + { MP_QSTR_skiprows, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = 0 } }, }; mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)]; @@ -286,9 +287,10 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw comment_char = _comment_char[0]; } + uint16_t skiprows = args[6].u_int; uint16_t max_rows = ULAB_IO_MAX_ROWS; if((args[3].u_int > 0) && (args[3].u_int < ULAB_IO_MAX_ROWS)) { - max_rows = args[3].u_int; + max_rows = args[3].u_int + skiprows; } uint16_t *cols = NULL; @@ -345,10 +347,12 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw // catch whitespaces here: if these are not on a comment line, then they delimit a number if(*offset == '\n') { - rows++; all_rows++; - items++; - len = 0; + if(all_rows > skiprows) { + rows++; + items++; + len = 0; + } if(all_rows == max_rows) { break; } @@ -361,7 +365,9 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw offset++; } if(len > 0) { - items++; + if(all_rows >= skiprows) { + items++; + } len = 0; } } else { @@ -371,6 +377,9 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw } } while((read > 0) && (all_rows < max_rows)); + if(rows == 0) { + mp_raise_ValueError(translate("empty file")); + } uint16_t columns = items / rows; if(columns < used_columns) { @@ -424,6 +433,10 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw } } + if(rows == max_rows) { + break; + } + if((*offset == ' ') || (*offset == '\t') || (*offset == '\v') || (*offset == '\f') || (*offset == '\r') || (*offset == '\n') || (*offset == delimiter)) { offset++; @@ -433,31 +446,30 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw } if(len > 0) { clipboard = clipboard_origin; - #if ULAB_MAX_DIMS == 1 - if(columns == cols[0]) { - io_assign_value(clipboard, len, ndarray, &idx, dtype); - } - #else - if(args[4].u_obj == mp_const_none) { - io_assign_value(clipboard, len, ndarray, &idx, dtype); - } else { - for(uint8_t c = 0; c < used_columns; c++) { - if(columns == cols[c]) { - io_assign_value(clipboard, len, ndarray, &idx, dtype); - break; + if(rows >= skiprows) { + #if ULAB_MAX_DIMS == 1 + if(columns == cols[0]) { + io_assign_value(clipboard, len, ndarray, &idx, dtype); + } + #else + if(args[4].u_obj == mp_const_none) { + io_assign_value(clipboard, len, ndarray, &idx, dtype); + } else { + for(uint8_t c = 0; c < used_columns; c++) { + if(columns == cols[c]) { + io_assign_value(clipboard, len, ndarray, &idx, dtype); + break; + } } } + #endif } - #endif columns++; len = 0; if(offset[-1] == '\n') { columns = 0; rows++; - if(rows == max_rows) { - break; - } } } } else { diff --git a/tests/2d/numpy/loadtxt.py b/tests/2d/numpy/loadtxt.py index 26ddfe20..f08a9164 100644 --- a/tests/2d/numpy/loadtxt.py +++ b/tests/2d/numpy/loadtxt.py @@ -32,3 +32,6 @@ a = np.array(range(36)).reshape((9, 4)) np.savetxt('loadtxt.dat', a, header='9 data rows and a comment') print(np.loadtxt('loadtxt.dat', max_rows=5)) + +print() +print(np.loadtxt('loadtxt.dat', skiprows=5, dtype=np.uint16)) diff --git a/tests/2d/numpy/loadtxt.py.exp b/tests/2d/numpy/loadtxt.py.exp index 373312f4..588a97e9 100644 --- a/tests/2d/numpy/loadtxt.py.exp +++ b/tests/2d/numpy/loadtxt.py.exp @@ -29,3 +29,9 @@ array([[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0], [12.0, 13.0, 14.0, 15.0]], dtype=float64) + +array([[16, 17, 18, 19], + [20, 21, 22, 23], + [24, 25, 26, 27], + [28, 29, 30, 31], + [32, 33, 34, 35]], dtype=uint16) From 9ba136acd43c9f504865c3b08a5e39394ceb83bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= Date: Mon, 31 Jan 2022 22:28:50 +0100 Subject: [PATCH 6/7] fix savetxt comments default value --- code/numpy/io/io.c | 4 ++-- tests/2d/numpy/savetxt.py.exp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/code/numpy/io/io.c b/code/numpy/io/io.c index 9edbdce2..8f8840ad 100644 --- a/code/numpy/io/io.c +++ b/code/numpy/io/io.c @@ -751,7 +751,7 @@ static mp_obj_t io_savetxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw const char *comments = mp_obj_str_get_data(args[5].u_obj, &_len); stream_p->write(stream, comments, _len, &error); } else { - stream_p->write(stream, "#", 1, &error); + stream_p->write(stream, "# ", 2, &error); } const char *header = mp_obj_str_get_data(args[3].u_obj, &_len); stream_p->write(stream, header, _len, &error); @@ -799,7 +799,7 @@ static mp_obj_t io_savetxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw const char *comments = mp_obj_str_get_data(args[5].u_obj, &_len); stream_p->write(stream, comments, _len, &error); } else { - stream_p->write(stream, "#", 1, &error); + stream_p->write(stream, "# ", 2, &error); } const char *footer = mp_obj_str_get_data(args[4].u_obj, &_len); stream_p->write(stream, footer, _len, &error); diff --git a/tests/2d/numpy/savetxt.py.exp b/tests/2d/numpy/savetxt.py.exp index d41b4f0e..22cdd211 100644 --- a/tests/2d/numpy/savetxt.py.exp +++ b/tests/2d/numpy/savetxt.py.exp @@ -20,7 +20,7 @@ savetxt with delimiter 6.000000000000000,7.000000000000000,8.000000000000000 savetxt with header -#column1 column2 column3 +# column1 column2 column3 0.000000000000000 1.000000000000000 2.000000000000000 3.000000000000000 4.000000000000000 5.000000000000000 6.000000000000000 7.000000000000000 8.000000000000000 @@ -29,5 +29,5 @@ savetxt with footer 0.000000000000000 1.000000000000000 2.000000000000000 3.000000000000000 4.000000000000000 5.000000000000000 6.000000000000000 7.000000000000000 8.000000000000000 -#written data file +# written data file From 6dcad4424c37713eb400c90e1f0ec73460b2003e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20V=C3=B6r=C3=B6s?= Date: Tue, 1 Feb 2022 18:42:58 +0100 Subject: [PATCH 7/7] extend loadtxt documentation --- code/ulab.c | 2 +- docs/manual/source/conf.py | 2 +- docs/manual/source/numpy-functions.rst | 31 +++++++++++++---- docs/numpy-functions.ipynb | 47 +++++++++++++++++++------- docs/ulab-change-log.md | 6 ++++ docs/ulab-convert.ipynb | 14 ++++---- 6 files changed, 73 insertions(+), 29 deletions(-) diff --git a/code/ulab.c b/code/ulab.c index 483063d1..1ca10e3f 100644 --- a/code/ulab.c +++ b/code/ulab.c @@ -33,7 +33,7 @@ #include "user/user.h" #include "utils/utils.h" -#define ULAB_VERSION 4.4.0 +#define ULAB_VERSION 4.4.2 #define xstr(s) str(s) #define str(s) #s diff --git a/docs/manual/source/conf.py b/docs/manual/source/conf.py index 80344b5d..c7b0e4b6 100644 --- a/docs/manual/source/conf.py +++ b/docs/manual/source/conf.py @@ -27,7 +27,7 @@ author = 'Zoltán Vörös' # The full version, including alpha/beta/rc tags -release = '4.4.1' +release = '4.4.2' # -- General configuration --------------------------------------------------- diff --git a/docs/manual/source/numpy-functions.rst b/docs/manual/source/numpy-functions.rst index 01660cc0..5894bea4 100644 --- a/docs/manual/source/numpy-functions.rst +++ b/docs/manual/source/numpy-functions.rst @@ -1027,11 +1027,16 @@ https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html The function reads data from a text file, and returns the generated array. It takes a file name as the single positional argument, and the -``dtype`` (with a default value of ``float``), the ``comments`` (with a -default value of ``#``), the ``delimiter`` (with a default value of -``,``), ``usecols`` (with a default of all columns), and the -``max_rows`` (with a default of all rows) keyword arguments. If -``dtype`` is supplied and is not ``float``, the data entries will be +following keyword arguments: + +1. ``comments='#'`` +2. ``dtype=float`` +3. ``delimiter=','`` +4. ``max_rows`` (with a default of all rows) +5. ``skip_rows=0`` +6. ``usecols`` (with a default of all columns) + +If ``dtype`` is supplied and is not ``float``, the data entries will be converted to the appropriate integer type by rounding the values. .. code:: @@ -1046,8 +1051,11 @@ converted to the appropriate integer type by rounding the values. print('\nread maximum 5 rows (first row is a comment line)') print(np.loadtxt('loadtxt.dat', max_rows=5)) - print('\nread maximum 5 rows, convert dtype') + print('\nread maximum 5 rows, convert dtype (first row is a comment line)') print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8)) + + print('\nskip the first 3 rows, convert dtype (first row is a comment line)') + print(np.loadtxt('loadtxt.dat', skiprows=3, dtype=np.uint8)) .. parsed-literal:: @@ -1068,12 +1076,21 @@ converted to the appropriate integer type by rounding the values. [8.0, 9.0, 10.0, 11.0], [12.0, 13.0, 14.0, 15.0]], dtype=float64) - read maximum 5 rows, convert dtype + read maximum 5 rows, convert dtype (first row is a comment line) array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]], dtype=uint8) + skip the first 3 rows, convert dtype (first row is a comment line) + array([[8, 9, 10, 11], + [12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23], + [24, 25, 26, 27], + [28, 29, 30, 31], + [32, 33, 34, 35]], dtype=uint8) + diff --git a/docs/numpy-functions.ipynb b/docs/numpy-functions.ipynb index f7383c84..4c2d009f 100644 --- a/docs/numpy-functions.ipynb +++ b/docs/numpy-functions.ipynb @@ -31,11 +31,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2022-01-29T21:24:54.931042Z", - "start_time": "2022-01-29T21:24:54.927243Z" + "end_time": "2022-02-01T17:37:25.505687Z", + "start_time": "2022-02-01T17:37:25.493850Z" } }, "outputs": [], @@ -49,11 +49,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2022-01-29T21:24:55.649634Z", - "start_time": "2022-01-29T21:24:55.626921Z" + "end_time": "2022-02-01T17:37:25.717714Z", + "start_time": "2022-02-01T17:37:25.532299Z" } }, "outputs": [], @@ -1474,16 +1474,25 @@ "\n", "`numpy`: https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html\n", "\n", - "The function reads data from a text file, and returns the generated array. It takes a file name as the single positional argument, and the `dtype` (with a default value of `float`), the `comments` (with a default value of `#`), the `delimiter` (with a default value of `,`), `usecols` (with a default of all columns), and the `max_rows` (with a default of all rows) keyword arguments. If `dtype` is supplied and is not `float`, the data entries will be converted to the appropriate integer type by rounding the values." + "The function reads data from a text file, and returns the generated array. It takes a file name as the single positional argument, and the following keyword arguments:\n", + "\n", + "1. `comments='#'`\n", + "1. `dtype=float`\n", + "1. `delimiter=','`\n", + "1. `max_rows` (with a default of all rows) \n", + "1. `skip_rows=0`\n", + "1. `usecols` (with a default of all columns)\n", + "\n", + "If `dtype` is supplied and is not `float`, the data entries will be converted to the appropriate integer type by rounding the values." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2022-01-29T21:26:36.258135Z", - "start_time": "2022-01-29T21:26:36.236256Z" + "end_time": "2022-02-01T17:41:22.384706Z", + "start_time": "2022-02-01T17:41:22.362821Z" } }, "outputs": [ @@ -1508,12 +1517,21 @@ " [8.0, 9.0, 10.0, 11.0],\n", " [12.0, 13.0, 14.0, 15.0]], dtype=float64)\n", "\n", - "read maximum 5 rows, convert dtype\n", + "read maximum 5 rows, convert dtype (first row is a comment line)\n", "array([[0, 1, 2, 3],\n", " [4, 5, 6, 7],\n", " [8, 9, 10, 11],\n", " [12, 13, 14, 15]], dtype=uint8)\n", "\n", + "skip the first 3 rows, convert dtype (first row is a comment line)\n", + "array([[8, 9, 10, 11],\n", + " [12, 13, 14, 15],\n", + " [16, 17, 18, 19],\n", + " [20, 21, 22, 23],\n", + " [24, 25, 26, 27],\n", + " [28, 29, 30, 31],\n", + " [32, 33, 34, 35]], dtype=uint8)\n", + "\n", "\n" ] } @@ -1529,8 +1547,11 @@ "print('\\nread maximum 5 rows (first row is a comment line)')\n", "print(np.loadtxt('loadtxt.dat', max_rows=5))\n", "\n", - "print('\\nread maximum 5 rows, convert dtype')\n", - "print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8))" + "print('\\nread maximum 5 rows, convert dtype (first row is a comment line)')\n", + "print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8))\n", + "\n", + "print('\\nskip the first 3 rows, convert dtype (first row is a comment line)')\n", + "print(np.loadtxt('loadtxt.dat', skiprows=3, dtype=np.uint8))" ] }, { diff --git a/docs/ulab-change-log.md b/docs/ulab-change-log.md index 981acb95..079a6ff6 100644 --- a/docs/ulab-change-log.md +++ b/docs/ulab-change-log.md @@ -1,3 +1,9 @@ +Tue, 1 Feb 2022 + +version 4.4.2 + + add skiprows keyword to loadtxt + Sat, 29 Jan 2022 version 4.4.1 diff --git a/docs/ulab-convert.ipynb b/docs/ulab-convert.ipynb index 1c6bcb27..62ff596a 100644 --- a/docs/ulab-convert.ipynb +++ b/docs/ulab-convert.ipynb @@ -17,8 +17,8 @@ "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2022-01-29T21:27:54.988801Z", - "start_time": "2022-01-29T21:27:54.980856Z" + "end_time": "2022-02-01T17:41:38.040350Z", + "start_time": "2022-02-01T17:41:38.023988Z" } }, "outputs": [ @@ -61,7 +61,7 @@ "author = 'Zoltán Vörös'\n", "\n", "# The full version, including alpha/beta/rc tags\n", - "release = '4.4.1'\n", + "release = '4.4.2'\n", "\n", "\n", "# -- General configuration ---------------------------------------------------\n", @@ -218,8 +218,8 @@ "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2022-01-29T21:27:59.573556Z", - "start_time": "2022-01-29T21:27:57.323819Z" + "end_time": "2022-02-01T17:41:42.215395Z", + "start_time": "2022-02-01T17:41:40.650763Z" } }, "outputs": [], @@ -259,8 +259,8 @@ "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2022-01-29T21:28:16.742315Z", - "start_time": "2022-01-29T21:28:11.284954Z" + "end_time": "2022-02-01T17:42:04.318049Z", + "start_time": "2022-02-01T17:41:59.671788Z" } }, "outputs": [],