Skip to content

Commit 88231bc

Browse files
authored
Merge pull request v923z#500 from v923z/loadtxt-fix
add dtype, and skiprows keywords to loadtxt
2 parents 9dc9b77 + 6dcad44 commit 88231bc

File tree

12 files changed

+213
-72
lines changed

12 files changed

+213
-72
lines changed

code/ndarray.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,7 @@ ndarray_obj_t *ndarray_copy_view_convert_type(ndarray_obj_t *source, uint8_t dty
775775
if((source->dtype == NDARRAY_FLOAT) && (dtype != NDARRAY_FLOAT)) {
776776
// floats must be treated separately, because they can't directly be converted to integer types
777777
mp_float_t f = ndarray_get_float_value(sarray, source->dtype);
778-
item = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(floor)(f));
778+
item = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(f));
779779
} else {
780780
item = mp_binary_get_val_array(source->dtype, sarray, 0);
781781
}

code/numpy/io/io.c

Lines changed: 67 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* Copyright (c) 2022 Zoltán Vörös
99
*/
1010

11+
#include <math.h>
1112
#include <string.h>
1213

1314
#include "py/builtin.h"
@@ -236,13 +237,24 @@ MP_DEFINE_CONST_FUN_OBJ_1(io_load_obj, io_load);
236237
#endif /* ULAB_NUMPY_HAS_LOAD */
237238

238239
#if ULAB_NUMPY_HAS_LOADTXT
240+
static void io_assign_value(const char *clipboard, uint8_t len, ndarray_obj_t *ndarray, size_t *idx, uint8_t dtype) {
241+
mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL);
242+
if(dtype != NDARRAY_FLOAT) {
243+
mp_float_t _value = mp_obj_get_float(value);
244+
value = mp_obj_new_int((int32_t)MICROPY_FLOAT_C_FUN(round)(_value));
245+
}
246+
ndarray_set_value(dtype, ndarray->array, (*idx)++, value);
247+
}
248+
239249
static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
240250
static const mp_arg_t allowed_args[] = {
241251
{ MP_QSTR_, MP_ARG_REQUIRED | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } },
242252
{ MP_QSTR_delimiter, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } },
243253
{ MP_QSTR_comments, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } },
244254
{ MP_QSTR_max_rows, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = -1 } },
245255
{ MP_QSTR_usecols, MP_ARG_KW_ONLY | MP_ARG_OBJ, { .u_rom_obj = mp_const_none } },
256+
{ MP_QSTR_dtype, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = NDARRAY_FLOAT } },
257+
{ MP_QSTR_skiprows, MP_ARG_KW_ONLY | MP_ARG_INT, { .u_int = 0 } },
246258
};
247259

248260
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
@@ -275,9 +287,10 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
275287
comment_char = _comment_char[0];
276288
}
277289

290+
uint16_t skiprows = args[6].u_int;
278291
uint16_t max_rows = ULAB_IO_MAX_ROWS;
279292
if((args[3].u_int > 0) && (args[3].u_int < ULAB_IO_MAX_ROWS)) {
280-
max_rows = args[3].u_int;
293+
max_rows = args[3].u_int + skiprows;
281294
}
282295

283296
uint16_t *cols = NULL;
@@ -304,13 +317,16 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
304317
}
305318
}
306319

320+
uint8_t dtype = args[5].u_int;
321+
307322
// count the columns and rows
308323
// we actually count only the rows and the items, and assume that
309324
// the number of columns can be gotten by means of a simple division,
310325
// i.e., that each row has the same number of columns
311326
char *offset;
312327
uint16_t rows = 0, items = 0, all_rows = 0;
313328
uint8_t read;
329+
uint8_t len = 0;
314330

315331
do {
316332
read = (uint8_t)stream_p->read(stream, buffer, ULAB_IO_BUFFER_SIZE - 1, &error);
@@ -331,9 +347,12 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
331347

332348
// catch whitespaces here: if these are not on a comment line, then they delimit a number
333349
if(*offset == '\n') {
334-
rows++;
335350
all_rows++;
336-
items++;
351+
if(all_rows > skiprows) {
352+
rows++;
353+
items++;
354+
len = 0;
355+
}
337356
if(all_rows == max_rows) {
338357
break;
339358
}
@@ -345,13 +364,22 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
345364
while((*offset == ' ') || (*offset == '\t') || (*offset == '\v') || (*offset == '\f') || (*offset == '\r')) {
346365
offset++;
347366
}
348-
items++;
367+
if(len > 0) {
368+
if(all_rows >= skiprows) {
369+
items++;
370+
}
371+
len = 0;
372+
}
349373
} else {
350374
offset++;
375+
len++;
351376
}
352377
}
353378
} while((read > 0) && (all_rows < max_rows));
354379

380+
if(rows == 0) {
381+
mp_raise_ValueError(translate("empty file"));
382+
}
355383
uint16_t columns = items / rows;
356384

357385
if(columns < used_columns) {
@@ -363,31 +391,30 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
363391

364392
#if ULAB_MAX_DIMS == 1
365393
shape[0] = rows;
366-
ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(1, shape, NDARRAY_FLOAT);
394+
ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(1, shape, dtype);
367395
#else
368396
if(args[4].u_obj == mp_const_none) {
369397
shape[ULAB_MAX_DIMS - 1] = columns;
370398
} else {
371399
shape[ULAB_MAX_DIMS - 1] = used_columns;
372400
}
373401
shape[ULAB_MAX_DIMS - 2] = rows;
374-
ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(2, shape, NDARRAY_FLOAT);
402+
ndarray_obj_t *ndarray = ndarray_new_dense_ndarray(2, shape, dtype);
375403
#endif
376404

377-
mp_float_t *array = (mp_float_t *)ndarray->array;
378-
379405
struct mp_stream_seek_t seek_s;
380406
seek_s.offset = 0;
381407
seek_s.whence = MP_SEEK_SET;
382408
stream_p->ioctl(stream, MP_STREAM_SEEK, (mp_uint_t)(uintptr_t)&seek_s, &error);
383409

384410
char *clipboard = m_new(char, ULAB_IO_CLIPBOARD_SIZE);
385411
char *clipboard_origin = clipboard;
386-
uint8_t len = 0;
387412

388413
rows = 0;
389414
columns = 0;
415+
len = 0;
390416

417+
size_t idx = 0;
391418
do {
392419
read = stream_p->read(stream, buffer, ULAB_IO_BUFFER_SIZE - 1, &error);
393420
buffer[read] = '\0';
@@ -406,40 +433,43 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
406433
}
407434
}
408435

436+
if(rows == max_rows) {
437+
break;
438+
}
439+
409440
if((*offset == ' ') || (*offset == '\t') || (*offset == '\v') ||
410441
(*offset == '\f') || (*offset == '\r') || (*offset == '\n') || (*offset == delimiter)) {
411442
offset++;
412-
while((*offset == ' ') || (*offset == '\t') || (*offset == '\v') || (*offset == '\f') || (*offset == '\r') || (*offset == '\n')) {
443+
while((*offset == ' ') || (*offset == '\t') || (*offset == '\v') ||
444+
(*offset == '\f') || (*offset == '\r') || (*offset == '\n')) {
413445
offset++;
414446
}
415-
clipboard = clipboard_origin;
416-
#if ULAB_MAX_DIMS == 1
417-
if(columns == cols[0]) {
418-
mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL);
419-
*array++ = mp_obj_get_float(value);
420-
}
421-
#else
422-
if(args[4].u_obj == mp_const_none) {
423-
mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL);
424-
*array++ = mp_obj_get_float(value);
425-
} else {
426-
for(uint8_t c = 0; c < used_columns; c++) {
427-
if(columns == cols[c]) {
428-
mp_obj_t value = mp_parse_num_decimal(clipboard, len, false, false, NULL);
429-
*array++ = mp_obj_get_float(value);
430-
break;
447+
if(len > 0) {
448+
clipboard = clipboard_origin;
449+
if(rows >= skiprows) {
450+
#if ULAB_MAX_DIMS == 1
451+
if(columns == cols[0]) {
452+
io_assign_value(clipboard, len, ndarray, &idx, dtype);
431453
}
454+
#else
455+
if(args[4].u_obj == mp_const_none) {
456+
io_assign_value(clipboard, len, ndarray, &idx, dtype);
457+
} else {
458+
for(uint8_t c = 0; c < used_columns; c++) {
459+
if(columns == cols[c]) {
460+
io_assign_value(clipboard, len, ndarray, &idx, dtype);
461+
break;
462+
}
463+
}
464+
}
465+
#endif
432466
}
433-
}
434-
#endif
435-
columns++;
436-
len = 0;
467+
columns++;
468+
len = 0;
437469

438-
if(offset[-1] == '\n') {
439-
columns = 0;
440-
rows++;
441-
if(rows == max_rows) {
442-
break;
470+
if(offset[-1] == '\n') {
471+
columns = 0;
472+
rows++;
443473
}
444474
}
445475
} else {
@@ -721,7 +751,7 @@ static mp_obj_t io_savetxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
721751
const char *comments = mp_obj_str_get_data(args[5].u_obj, &_len);
722752
stream_p->write(stream, comments, _len, &error);
723753
} else {
724-
stream_p->write(stream, "#", 1, &error);
754+
stream_p->write(stream, "# ", 2, &error);
725755
}
726756
const char *header = mp_obj_str_get_data(args[3].u_obj, &_len);
727757
stream_p->write(stream, header, _len, &error);
@@ -769,7 +799,7 @@ static mp_obj_t io_savetxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
769799
const char *comments = mp_obj_str_get_data(args[5].u_obj, &_len);
770800
stream_p->write(stream, comments, _len, &error);
771801
} else {
772-
stream_p->write(stream, "#", 1, &error);
802+
stream_p->write(stream, "# ", 2, &error);
773803
}
774804
const char *footer = mp_obj_str_get_data(args[4].u_obj, &_len);
775805
stream_p->write(stream, footer, _len, &error);

code/numpy/linalg/linalg.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -435,22 +435,22 @@ static mp_obj_t linalg_qr(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_
435435
// [[c s],
436436
// [s -c]]
437437
if(MICROPY_FLOAT_C_FUN(fabs)(rarray[i * n + j]) < LINALG_EPSILON) { // r[i, j]
438-
c = (rarray[(i - 1) * n + j] >= 0.0) ? 1.0 : -1.0; // r[i-1, j]
438+
c = (rarray[(i - 1) * n + j] >= MICROPY_FLOAT_CONST(0.0)) ? MICROPY_FLOAT_CONST(1.0) : MICROPY_FLOAT_CONST(-1.0); // r[i-1, j]
439439
s = 0.0;
440440
} else if(MICROPY_FLOAT_C_FUN(fabs)(rarray[(i - 1) * n + j]) < LINALG_EPSILON) { // r[i-1, j]
441441
c = 0.0;
442-
s = (rarray[i * n + j] >= 0.0) ? -1.0 : 1.0; // r[i, j]
442+
s = (rarray[i * n + j] >= MICROPY_FLOAT_CONST(0.0)) ? MICROPY_FLOAT_CONST(-1.0) : MICROPY_FLOAT_CONST(1.0); // r[i, j]
443443
} else {
444444
mp_float_t t, u;
445445
if(MICROPY_FLOAT_C_FUN(fabs)(rarray[(i - 1) * n + j]) > MICROPY_FLOAT_C_FUN(fabs)(rarray[i * n + j])) { // r[i-1, j], r[i, j]
446446
t = rarray[i * n + j] / rarray[(i - 1) * n + j]; // r[i, j]/r[i-1, j]
447447
u = MICROPY_FLOAT_C_FUN(sqrt)(1 + t * t);
448-
c = -1.0 / u;
448+
c = MICROPY_FLOAT_CONST(-1.0) / u;
449449
s = c * t;
450450
} else {
451451
t = rarray[(i - 1) * n + j] / rarray[i * n + j]; // r[i-1, j]/r[i, j]
452452
u = MICROPY_FLOAT_C_FUN(sqrt)(1 + t * t);
453-
s = -1.0 / u;
453+
s = MICROPY_FLOAT_CONST(-1.0) / u;
454454
c = s * t;
455455
}
456456
}

code/ulab.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
#include "user/user.h"
3434
#include "utils/utils.h"
3535

36-
#define ULAB_VERSION 4.4.0
36+
#define ULAB_VERSION 4.4.2
3737
#define xstr(s) str(s)
3838
#define str(s) #s
3939

docs/manual/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
author = 'Zoltán Vörös'
2828

2929
# The full version, including alpha/beta/rc tags
30-
release = '4.4.0'
30+
release = '4.4.2'
3131

3232

3333
# -- General configuration ---------------------------------------------------

docs/manual/source/numpy-functions.rst

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,10 +1027,17 @@ https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html
10271027

10281028
The function reads data from a text file, and returns the generated
10291029
array. It takes a file name as the single positional argument, and the
1030-
``comments`` (with a default value of ``#``), the ``delimiter`` (with a
1031-
default value of ``,``), ``usecols`` (with a default of all columns),
1032-
and ``max_rows`` (with a default of all rows) keyword arguments. The
1033-
array returned is always of type ``float``.
1030+
following keyword arguments:
1031+
1032+
1. ``comments='#'``
1033+
2. ``dtype=float``
1034+
3. ``delimiter=','``
1035+
4. ``max_rows`` (with a default of all rows)
1036+
5. ``skip_rows=0``
1037+
6. ``usecols`` (with a default of all columns)
1038+
1039+
If ``dtype`` is supplied and is not ``float``, the data entries will be
1040+
converted to the appropriate integer type by rounding the values.
10341041

10351042
.. code::
10361043
@@ -1040,8 +1047,15 @@ array returned is always of type ``float``.
10401047
10411048
print('read all data')
10421049
print(np.loadtxt('loadtxt.dat'))
1050+
10431051
print('\nread maximum 5 rows (first row is a comment line)')
10441052
print(np.loadtxt('loadtxt.dat', max_rows=5))
1053+
1054+
print('\nread maximum 5 rows, convert dtype (first row is a comment line)')
1055+
print(np.loadtxt('loadtxt.dat', max_rows=5, dtype=np.uint8))
1056+
1057+
print('\nskip the first 3 rows, convert dtype (first row is a comment line)')
1058+
print(np.loadtxt('loadtxt.dat', skiprows=3, dtype=np.uint8))
10451059
10461060
.. parsed-literal::
10471061
@@ -1052,14 +1066,31 @@ array returned is always of type ``float``.
10521066
[12.0, 13.0, 14.0, 15.0],
10531067
[16.0, 17.0, 18.0, 19.0],
10541068
[20.0, 21.0, 22.0, 23.0],
1055-
[24.0, 25.0, 26.0, 27.0]], dtype=float64)
1069+
[24.0, 25.0, 26.0, 27.0],
1070+
[28.00000000000001, 29.0, 30.0, 31.0],
1071+
[32.0, 33.0, 34.00000000000001, 35.0]], dtype=float64)
10561072
10571073
read maximum 5 rows (first row is a comment line)
10581074
array([[0.0, 1.0, 2.0, 3.0],
10591075
[4.0, 5.0, 6.0, 7.0],
10601076
[8.0, 9.0, 10.0, 11.0],
10611077
[12.0, 13.0, 14.0, 15.0]], dtype=float64)
10621078
1079+
read maximum 5 rows, convert dtype (first row is a comment line)
1080+
array([[0, 1, 2, 3],
1081+
[4, 5, 6, 7],
1082+
[8, 9, 10, 11],
1083+
[12, 13, 14, 15]], dtype=uint8)
1084+
1085+
skip the first 3 rows, convert dtype (first row is a comment line)
1086+
array([[8, 9, 10, 11],
1087+
[12, 13, 14, 15],
1088+
[16, 17, 18, 19],
1089+
[20, 21, 22, 23],
1090+
[24, 25, 26, 27],
1091+
[28, 29, 30, 31],
1092+
[32, 33, 34, 35]], dtype=uint8)
1093+
10631094
10641095
10651096

0 commit comments

Comments
 (0)