88 * Copyright (c) 2022 Zoltán Vörös
99*/
1010
11+ #include <math.h>
1112#include <string.h>
1213
1314#include "py/builtin.h"
@@ -236,13 +237,24 @@ MP_DEFINE_CONST_FUN_OBJ_1(io_load_obj, io_load);
236237#endif /* ULAB_NUMPY_HAS_LOAD */
237238
238239#if ULAB_NUMPY_HAS_LOADTXT
240+ static void io_assign_value (const char * clipboard , uint8_t len , ndarray_obj_t * ndarray , size_t * idx , uint8_t dtype ) {
241+ mp_obj_t value = mp_parse_num_decimal (clipboard , len , false, false, NULL );
242+ if (dtype != NDARRAY_FLOAT ) {
243+ mp_float_t _value = mp_obj_get_float (value );
244+ value = mp_obj_new_int ((int32_t )MICROPY_FLOAT_C_FUN (round )(_value ));
245+ }
246+ ndarray_set_value (dtype , ndarray -> array , (* idx )++ , value );
247+ }
248+
239249static mp_obj_t io_loadtxt (size_t n_args , const mp_obj_t * pos_args , mp_map_t * kw_args ) {
240250 static const mp_arg_t allowed_args [] = {
241251 { MP_QSTR_ , MP_ARG_REQUIRED | MP_ARG_OBJ , { .u_rom_obj = mp_const_none } },
242252 { MP_QSTR_delimiter , MP_ARG_KW_ONLY | MP_ARG_OBJ , { .u_rom_obj = mp_const_none } },
243253 { MP_QSTR_comments , MP_ARG_KW_ONLY | MP_ARG_OBJ , { .u_rom_obj = mp_const_none } },
244254 { MP_QSTR_max_rows , MP_ARG_KW_ONLY | MP_ARG_INT , { .u_int = -1 } },
245255 { MP_QSTR_usecols , MP_ARG_KW_ONLY | MP_ARG_OBJ , { .u_rom_obj = mp_const_none } },
256+ { MP_QSTR_dtype , MP_ARG_KW_ONLY | MP_ARG_INT , { .u_int = NDARRAY_FLOAT } },
257+ { MP_QSTR_skiprows , MP_ARG_KW_ONLY | MP_ARG_INT , { .u_int = 0 } },
246258 };
247259
248260 mp_arg_val_t args [MP_ARRAY_SIZE (allowed_args )];
@@ -275,9 +287,10 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
275287 comment_char = _comment_char [0 ];
276288 }
277289
290+ uint16_t skiprows = args [6 ].u_int ;
278291 uint16_t max_rows = ULAB_IO_MAX_ROWS ;
279292 if ((args [3 ].u_int > 0 ) && (args [3 ].u_int < ULAB_IO_MAX_ROWS )) {
280- max_rows = args [3 ].u_int ;
293+ max_rows = args [3 ].u_int + skiprows ;
281294 }
282295
283296 uint16_t * cols = NULL ;
@@ -304,13 +317,16 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
304317 }
305318 }
306319
320+ uint8_t dtype = args [5 ].u_int ;
321+
307322 // count the columns and rows
308323 // we actually count only the rows and the items, and assume that
309324 // the number of columns can be gotten by means of a simple division,
310325 // i.e., that each row has the same number of columns
311326 char * offset ;
312327 uint16_t rows = 0 , items = 0 , all_rows = 0 ;
313328 uint8_t read ;
329+ uint8_t len = 0 ;
314330
315331 do {
316332 read = (uint8_t )stream_p -> read (stream , buffer , ULAB_IO_BUFFER_SIZE - 1 , & error );
@@ -331,9 +347,12 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
331347
332348 // catch whitespaces here: if these are not on a comment line, then they delimit a number
333349 if (* offset == '\n' ) {
334- rows ++ ;
335350 all_rows ++ ;
336- items ++ ;
351+ if (all_rows > skiprows ) {
352+ rows ++ ;
353+ items ++ ;
354+ len = 0 ;
355+ }
337356 if (all_rows == max_rows ) {
338357 break ;
339358 }
@@ -345,13 +364,22 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
345364 while ((* offset == ' ' ) || (* offset == '\t' ) || (* offset == '\v' ) || (* offset == '\f' ) || (* offset == '\r' )) {
346365 offset ++ ;
347366 }
348- items ++ ;
367+ if (len > 0 ) {
368+ if (all_rows >= skiprows ) {
369+ items ++ ;
370+ }
371+ len = 0 ;
372+ }
349373 } else {
350374 offset ++ ;
375+ len ++ ;
351376 }
352377 }
353378 } while ((read > 0 ) && (all_rows < max_rows ));
354379
380+ if (rows == 0 ) {
381+ mp_raise_ValueError (translate ("empty file" ));
382+ }
355383 uint16_t columns = items / rows ;
356384
357385 if (columns < used_columns ) {
@@ -363,31 +391,30 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
363391
364392 #if ULAB_MAX_DIMS == 1
365393 shape [0 ] = rows ;
366- ndarray_obj_t * ndarray = ndarray_new_dense_ndarray (1 , shape , NDARRAY_FLOAT );
394+ ndarray_obj_t * ndarray = ndarray_new_dense_ndarray (1 , shape , dtype );
367395 #else
368396 if (args [4 ].u_obj == mp_const_none ) {
369397 shape [ULAB_MAX_DIMS - 1 ] = columns ;
370398 } else {
371399 shape [ULAB_MAX_DIMS - 1 ] = used_columns ;
372400 }
373401 shape [ULAB_MAX_DIMS - 2 ] = rows ;
374- ndarray_obj_t * ndarray = ndarray_new_dense_ndarray (2 , shape , NDARRAY_FLOAT );
402+ ndarray_obj_t * ndarray = ndarray_new_dense_ndarray (2 , shape , dtype );
375403 #endif
376404
377- mp_float_t * array = (mp_float_t * )ndarray -> array ;
378-
379405 struct mp_stream_seek_t seek_s ;
380406 seek_s .offset = 0 ;
381407 seek_s .whence = MP_SEEK_SET ;
382408 stream_p -> ioctl (stream , MP_STREAM_SEEK , (mp_uint_t )(uintptr_t )& seek_s , & error );
383409
384410 char * clipboard = m_new (char , ULAB_IO_CLIPBOARD_SIZE );
385411 char * clipboard_origin = clipboard ;
386- uint8_t len = 0 ;
387412
388413 rows = 0 ;
389414 columns = 0 ;
415+ len = 0 ;
390416
417+ size_t idx = 0 ;
391418 do {
392419 read = stream_p -> read (stream , buffer , ULAB_IO_BUFFER_SIZE - 1 , & error );
393420 buffer [read ] = '\0' ;
@@ -406,40 +433,43 @@ static mp_obj_t io_loadtxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
406433 }
407434 }
408435
436+ if (rows == max_rows ) {
437+ break ;
438+ }
439+
409440 if ((* offset == ' ' ) || (* offset == '\t' ) || (* offset == '\v' ) ||
410441 (* offset == '\f' ) || (* offset == '\r' ) || (* offset == '\n' ) || (* offset == delimiter )) {
411442 offset ++ ;
412- while ((* offset == ' ' ) || (* offset == '\t' ) || (* offset == '\v' ) || (* offset == '\f' ) || (* offset == '\r' ) || (* offset == '\n' )) {
443+ while ((* offset == ' ' ) || (* offset == '\t' ) || (* offset == '\v' ) ||
444+ (* offset == '\f' ) || (* offset == '\r' ) || (* offset == '\n' )) {
413445 offset ++ ;
414446 }
415- clipboard = clipboard_origin ;
416- #if ULAB_MAX_DIMS == 1
417- if (columns == cols [0 ]) {
418- mp_obj_t value = mp_parse_num_decimal (clipboard , len , false, false, NULL );
419- * array ++ = mp_obj_get_float (value );
420- }
421- #else
422- if (args [4 ].u_obj == mp_const_none ) {
423- mp_obj_t value = mp_parse_num_decimal (clipboard , len , false, false, NULL );
424- * array ++ = mp_obj_get_float (value );
425- } else {
426- for (uint8_t c = 0 ; c < used_columns ; c ++ ) {
427- if (columns == cols [c ]) {
428- mp_obj_t value = mp_parse_num_decimal (clipboard , len , false, false, NULL );
429- * array ++ = mp_obj_get_float (value );
430- break ;
447+ if (len > 0 ) {
448+ clipboard = clipboard_origin ;
449+ if (rows >= skiprows ) {
450+ #if ULAB_MAX_DIMS == 1
451+ if (columns == cols [0 ]) {
452+ io_assign_value (clipboard , len , ndarray , & idx , dtype );
431453 }
454+ #else
455+ if (args [4 ].u_obj == mp_const_none ) {
456+ io_assign_value (clipboard , len , ndarray , & idx , dtype );
457+ } else {
458+ for (uint8_t c = 0 ; c < used_columns ; c ++ ) {
459+ if (columns == cols [c ]) {
460+ io_assign_value (clipboard , len , ndarray , & idx , dtype );
461+ break ;
462+ }
463+ }
464+ }
465+ #endif
432466 }
433- }
434- #endif
435- columns ++ ;
436- len = 0 ;
467+ columns ++ ;
468+ len = 0 ;
437469
438- if (offset [-1 ] == '\n' ) {
439- columns = 0 ;
440- rows ++ ;
441- if (rows == max_rows ) {
442- break ;
470+ if (offset [-1 ] == '\n' ) {
471+ columns = 0 ;
472+ rows ++ ;
443473 }
444474 }
445475 } else {
@@ -721,7 +751,7 @@ static mp_obj_t io_savetxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
721751 const char * comments = mp_obj_str_get_data (args [5 ].u_obj , & _len );
722752 stream_p -> write (stream , comments , _len , & error );
723753 } else {
724- stream_p -> write (stream , "#" , 1 , & error );
754+ stream_p -> write (stream , "# " , 2 , & error );
725755 }
726756 const char * header = mp_obj_str_get_data (args [3 ].u_obj , & _len );
727757 stream_p -> write (stream , header , _len , & error );
@@ -769,7 +799,7 @@ static mp_obj_t io_savetxt(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw
769799 const char * comments = mp_obj_str_get_data (args [5 ].u_obj , & _len );
770800 stream_p -> write (stream , comments , _len , & error );
771801 } else {
772- stream_p -> write (stream , "#" , 1 , & error );
802+ stream_p -> write (stream , "# " , 2 , & error );
773803 }
774804 const char * footer = mp_obj_str_get_data (args [4 ].u_obj , & _len );
775805 stream_p -> write (stream , footer , _len , & error );
0 commit comments