From 4855ea9b986795e12f090cdd55f35b67d2f867ad Mon Sep 17 00:00:00 2001 From: Nathan Richard Date: Fri, 28 Jun 2024 18:13:08 +0200 Subject: [PATCH 1/4] feat: pydantic-csv --- .github/FUNDING.yml | 12 - .gitignore | 29 +- .pre-commit-config.yaml | 51 +++ .travis.yml | 9 - AUTHORS.md | 12 +- CONTRIBUTING.md | 8 +- HISTORY.md | 78 +---- LICENSE | 3 - MANIFEST.in | 11 - Pipfile | 14 - Pipfile.lock | 194 ----------- README.md | 399 +++++++++++------------ dataclass_csv/__init__.py | 71 ---- dataclass_csv/__init__.pyi | 7 - dataclass_csv/dataclass_reader.py | 258 --------------- dataclass_csv/dataclass_reader.pyi | 18 - dataclass_csv/dataclass_writer.py | 68 ---- dataclass_csv/dataclass_writer.pyi | 14 - dataclass_csv/decorators.py | 57 ---- dataclass_csv/decorators.pyi | 6 - dataclass_csv/exceptions.py | 12 - dataclass_csv/exceptions.pyi | 6 - dataclass_csv/field_mapper.py | 18 - dataclass_csv/field_mapper.pyi | 5 - dataclass_csv/header_mapper.py | 19 -- dataclass_csv/header_mapper.pyi | 5 - pydantic_csv/__init__.py | 63 ++++ pydantic_csv/basemodel_csv_reader.py | 178 ++++++++++ pydantic_csv/basemodel_csv_writer.py | 101 ++++++ pydantic_csv/exceptions.py | 19 ++ pydantic_csv/header_mapper.py | 32 ++ {dataclass_csv => pydantic_csv}/py.typed | 0 pyproject.toml | 18 + setup.cfg | 26 -- setup.py | 56 ---- tests/conftest.py | 116 ++++++- tests/mocks.py | 137 -------- tests/mocks/dates.csv | 3 + tests/mocks/default_factory.csv | 3 + tests/mocks/users.csv | 4 + tests/mocks/users_duplicate_header.csv | 4 + tests/mocks/users_empty_spaces.csv | 3 + tests/mocks/users_mapped.csv | 4 + tests/mocks/users_optional.csv | 3 + tests/mocks/users_space_in_header.csv | 4 + tests/mocks/users_wrong_type.csv | 3 + tests/models.py | 53 +++ tests/test_basemodel_csv_reader.py | 111 +++++++ tests/test_basemodel_csv_writer.py | 52 +++ tests/test_csv_data_validation.py | 89 ----- tests/test_dataclass_reader.py | 296 ----------------- tests/test_dataclass_writer.py | 72 ---- tests/test_decorators.py | 83 ----- 53 files changed, 1033 insertions(+), 1884 deletions(-) delete mode 100644 .github/FUNDING.yml create mode 100644 .pre-commit-config.yaml delete mode 100644 .travis.yml delete mode 100644 MANIFEST.in delete mode 100644 Pipfile delete mode 100644 Pipfile.lock delete mode 100644 dataclass_csv/__init__.py delete mode 100644 dataclass_csv/__init__.pyi delete mode 100644 dataclass_csv/dataclass_reader.py delete mode 100644 dataclass_csv/dataclass_reader.pyi delete mode 100644 dataclass_csv/dataclass_writer.py delete mode 100644 dataclass_csv/dataclass_writer.pyi delete mode 100644 dataclass_csv/decorators.py delete mode 100644 dataclass_csv/decorators.pyi delete mode 100644 dataclass_csv/exceptions.py delete mode 100644 dataclass_csv/exceptions.pyi delete mode 100644 dataclass_csv/field_mapper.py delete mode 100644 dataclass_csv/field_mapper.pyi delete mode 100644 dataclass_csv/header_mapper.py delete mode 100644 dataclass_csv/header_mapper.pyi create mode 100644 pydantic_csv/__init__.py create mode 100644 pydantic_csv/basemodel_csv_reader.py create mode 100644 pydantic_csv/basemodel_csv_writer.py create mode 100644 pydantic_csv/exceptions.py create mode 100644 pydantic_csv/header_mapper.py rename {dataclass_csv => pydantic_csv}/py.typed (100%) create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100644 setup.py delete mode 100644 tests/mocks.py create mode 100644 tests/mocks/dates.csv create mode 100644 tests/mocks/default_factory.csv create mode 100644 tests/mocks/users.csv create mode 100644 tests/mocks/users_duplicate_header.csv create mode 100644 tests/mocks/users_empty_spaces.csv create mode 100644 tests/mocks/users_mapped.csv create mode 100644 tests/mocks/users_optional.csv create mode 100644 tests/mocks/users_space_in_header.csv create mode 100644 tests/mocks/users_wrong_type.csv create mode 100644 tests/models.py create mode 100644 tests/test_basemodel_csv_reader.py create mode 100644 tests/test_basemodel_csv_writer.py delete mode 100644 tests/test_csv_data_validation.py delete mode 100644 tests/test_dataclass_reader.py delete mode 100644 tests/test_dataclass_writer.py delete mode 100644 tests/test_decorators.py diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml deleted file mode 100644 index 9691dc0..0000000 --- a/.github/FUNDING.yml +++ /dev/null @@ -1,12 +0,0 @@ -# These are supported funding model platforms - -github: [dfurtado] -patreon: # Replace with a single Patreon username -open_collective: # Replace with a single Open Collective username -ko_fi: # Replace with a single Ko-fi username -tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel -community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry -liberapay: # Replace with a single Liberapay username -issuehunt: # Replace with a single IssueHunt username -otechie: # Replace with a single Otechie username -custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.gitignore b/.gitignore index bc8e7d8..833b9e2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,28 @@ -__pycache__ -*.pyc -.idea +# Environments +.env +.venv env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Cache +**/__pycache__ +.dccache + +# PyCharm +.idea/* + +# Visual Studio Code +.vscode/* +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# other +*.pyc *.swo *.swp *.*~ @@ -15,7 +36,5 @@ docs build dist .eggs -.vscode gmon.out .vim -pyproject.toml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c263237 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,51 @@ +repos: + # update certain features to python 3.9 features + - repo: https://github.com/asottile/pyupgrade + rev: v3.16.0 + hooks: + - id: pyupgrade + args: + - --py39-plus + - --keep-runtime-typing + + # useful pre-commit hooks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-ast + - id: check-yaml + - id: check-toml + - id: end-of-file-fixer + - id: trailing-whitespace + + # import statement sorter + - repo: https://github.com/PyCQA/isort + rev: 5.13.2 + hooks: + - id: isort + name: isort (python) + args: + - --profile=black + + # code formatter + - repo: https://github.com/psf/black + rev: 24.4.2 + hooks: + - id: black + args: + - --line-length=120 + - --target-version=py39 + + # code quality analysis + - repo: https://github.com/PyCQA/pylint + rev: v3.2.4 + hooks: + - id: pylint + name: pylint + args: + - --disable=E0401 + - --disable=too-many-arguments + - --disable=too-few-public-methods + - --max-line-length=120 + - --recursive=y + - --ignore-paths=venv*,tests* diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index bd1a1fb..0000000 --- a/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -language: python -python: - - "3.7-dev" -# command to install dependencies -install: - - "pip install pipenv" - - "pipenv install" -# command to run tests -script: pytest diff --git a/AUTHORS.md b/AUTHORS.md index 96f9dc0..e1ed954 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -2,14 +2,12 @@ ## Development Lead -* Daniel Furtado +* Nathan Richard ## Contributors -* Nick Schober -* Zoltan Ivanfi -* Alec Benzer -* Clint Byrum -* @johnthangen +* Be the first to contribute to this repo -See complete list at: https://github.com/dfurtado/dataclass-csv/graphs/contributors +## Special Thanks +* Daniel Furtado ([github](https://github.com/dfurtado)) and his python package 'dataclass-csv' ([pypi](https://pypi.org/project/dataclass-csv/) | [github](https://github.com/dfurtado/dataclass-csv)). The most of the Codebase and Documentation is from him and just adjusted for using pydantic.BaseModel. +* Daniel Seifert ([github](https://github.com/dfseifert)) for the guidance with building a pyproject and reviewing my code <3 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ef7b88c..ba9d5b4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,9 +3,6 @@ I love to work together with people so if you have an excellent idea for a feature, improvements or maybe you found a bug. Please, don't hesitate in adding an issue, so we can discuss and find a good solution together. -If you never participated in any open-source project before it is even better! I can help you through the process. The -important thing is to get more people involved in the Python community. So, don't be shy! - ## Getting started The best way to get started is to look at the issues section and see if the bug or feature you are planning to work with @@ -52,9 +49,10 @@ If you get an issue to work on, then you can: ## Before you submit a pull request - Make sure to add unit tests (if applicable) -- Make sure all tests are passing -- Run a code formatter. This project uses black, you can run the command: `black -l79 -N -S ./dataclass_csv` +- Make sure all tests are passing (Note that the CSV files have to use \r\n as newline) +- Run a code formatter. This project uses black, you can run the command: `black -l 120 -t py39 ./pydantic_csv` - Add docstrings for new functions and classes. +- Make sure you follow the [PEP 8](https://pep8.org/) Style Guide (run the pre-commit) diff --git a/HISTORY.md b/HISTORY.md index 72efb7b..ead94c4 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,81 +1,5 @@ # History -### 0.1.0 (2018-11-25) +### 0.1.0 (2024-06-28) * First release on PyPI. - -### 0.1.1 (2018-11-25) - -* Documentation fixes. - -### 0.1.2 (2018-11-25) - -* Documentation fixes. - -### 0.1.3 (2018-11-26) - -* Bug fixes -* Removed the requirement of setting the dataclass init to `True` - -### 0.1.5 (2018-11-29) - -* Support for parsing datetime values. -* Better handling when default values are set to `None` - -### 0.1.6 (2018-12-01) - -* Added support for reader default values from the default property of the `dataclasses.field`. -* Added support for allowing string values with only white spaces in a class level using the `@accept_whitespaces` decorator or through the `dataclasses.field` metadata. -* Added support for specifying date format using the `dataclasses.field` metadata. - -### 0.1.7 (2018-12-01) - -* Added support for default values from `default_factory` in the field's metadata. This allows adding mutable default values to the dataclass properties. - -### 1.0.0 (2018-12-16) - -* When a data does not pass validation it shows the line number in the CSV file where the data contain errors. -* Improved error handling. -* Changed the usage of the `@accept_whitespaces` decorator. -* Updated documentation. - -### 1.0.1 (2019-01-29) - -* Fixed issue when parsing headers on a CSV file with trailing white spaces. - -### 1.1.0 (2019-02-17) - -* Added support for boolean values. -* Docstrings - -### 1.1.1 (2019-02-17) - -* Documentation fixes. - -### 1.1.2 (2019-02-17) - -* Documentation fixes. - -### 1.1.3 (2020-03-01) - -* Handle properties with init set to False -* Handle Option type annotation - -### 1.2.0 (2021-03-02) - -* Introduction of a DataclassWriter -* Added type hinting to external API -* Documentation updates -* Bug fixes - -## 1.3.0 (2021-04-10) - -* Included stub files -* check if the CSV file has duplicated header values -* Fixed issues #22 and #33 -* code cleanup - -## 1.4.0 (2021-12-13) - -* Bug fixes -* Support for date types \ No newline at end of file diff --git a/LICENSE b/LICENSE index 0ccf6b4..34a5dde 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,3 @@ - - BSD License Copyright (c) 2018, Daniel Furtado @@ -29,4 +27,3 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 68e47f0..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,11 +0,0 @@ -include AUTHORS.md -include CONTRIBUTING.md -include HISTORY.md -include LICENSE -include README.md - -recursive-include tests * -recursive-exclude * __pycache__ -recursive-exclude * *.py[co] - -recursive-include docs *.md conf.py Makefile make.bat *.jpg *.png *.gif diff --git a/Pipfile b/Pipfile deleted file mode 100644 index a8e5493..0000000 --- a/Pipfile +++ /dev/null @@ -1,14 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[dev-packages] -pytest = "*" -mypy = "*" -flake8 = "*" - -[packages] - -[requires] -python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock deleted file mode 100644 index 602f200..0000000 --- a/Pipfile.lock +++ /dev/null @@ -1,194 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "ff9642fcfefdd196731283041b11231f54200352dd42071a89fc5dbe84ce128b" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.7" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.org/simple", - "verify_ssl": true - } - ] - }, - "default": {}, - "develop": { - "attrs": { - "hashes": [ - "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6", - "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==20.3.0" - }, - "flake8": { - "hashes": [ - "sha256:12d05ab02614b6aee8df7c36b97d1a3b2372761222b19b58621355e82acddcff", - "sha256:78873e372b12b093da7b5e5ed302e8ad9e988b38b063b61ad937f26ca58fc5f0" - ], - "index": "pypi", - "version": "==3.9.0" - }, - "iniconfig": { - "hashes": [ - "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", - "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" - ], - "version": "==1.1.1" - }, - "mccabe": { - "hashes": [ - "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", - "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" - ], - "version": "==0.6.1" - }, - "mypy": { - "hashes": [ - "sha256:0d0a87c0e7e3a9becdfbe936c981d32e5ee0ccda3e0f07e1ef2c3d1a817cf73e", - "sha256:25adde9b862f8f9aac9d2d11971f226bd4c8fbaa89fb76bdadb267ef22d10064", - "sha256:28fb5479c494b1bab244620685e2eb3c3f988d71fd5d64cc753195e8ed53df7c", - "sha256:2f9b3407c58347a452fc0736861593e105139b905cca7d097e413453a1d650b4", - "sha256:33f159443db0829d16f0a8d83d94df3109bb6dd801975fe86bacb9bf71628e97", - "sha256:3f2aca7f68580dc2508289c729bd49ee929a436208d2b2b6aab15745a70a57df", - "sha256:499c798053cdebcaa916eef8cd733e5584b5909f789de856b482cd7d069bdad8", - "sha256:4eec37370483331d13514c3f55f446fc5248d6373e7029a29ecb7b7494851e7a", - "sha256:552a815579aa1e995f39fd05dde6cd378e191b063f031f2acfe73ce9fb7f9e56", - "sha256:5873888fff1c7cf5b71efbe80e0e73153fe9212fafdf8e44adfe4c20ec9f82d7", - "sha256:61a3d5b97955422964be6b3baf05ff2ce7f26f52c85dd88db11d5e03e146a3a6", - "sha256:674e822aa665b9fd75130c6c5f5ed9564a38c6cea6a6432ce47eafb68ee578c5", - "sha256:7ce3175801d0ae5fdfa79b4f0cfed08807af4d075b402b7e294e6aa72af9aa2a", - "sha256:9743c91088d396c1a5a3c9978354b61b0382b4e3c440ce83cf77994a43e8c521", - "sha256:9f94aac67a2045ec719ffe6111df543bac7874cee01f41928f6969756e030564", - "sha256:a26f8ec704e5a7423c8824d425086705e381b4f1dfdef6e3a1edab7ba174ec49", - "sha256:abf7e0c3cf117c44d9285cc6128856106183938c68fd4944763003decdcfeb66", - "sha256:b09669bcda124e83708f34a94606e01b614fa71931d356c1f1a5297ba11f110a", - "sha256:cd07039aa5df222037005b08fbbfd69b3ab0b0bd7a07d7906de75ae52c4e3119", - "sha256:d23e0ea196702d918b60c8288561e722bf437d82cb7ef2edcd98cfa38905d506", - "sha256:d65cc1df038ef55a99e617431f0553cd77763869eebdf9042403e16089fe746c", - "sha256:d7da2e1d5f558c37d6e8c1246f1aec1e7349e4913d8fb3cb289a35de573fe2eb" - ], - "index": "pypi", - "version": "==0.812" - }, - "mypy-extensions": { - "hashes": [ - "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", - "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" - ], - "version": "==0.4.3" - }, - "packaging": { - "hashes": [ - "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5", - "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==20.9" - }, - "pluggy": { - "hashes": [ - "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0", - "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==0.13.1" - }, - "py": { - "hashes": [ - "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3", - "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==1.10.0" - }, - "pycodestyle": { - "hashes": [ - "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068", - "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.7.0" - }, - "pyflakes": { - "hashes": [ - "sha256:910208209dcea632721cb58363d0f72913d9e8cf64dc6f8ae2e02a3609aba40d", - "sha256:e59fd8e750e588358f1b8885e5a4751203a0516e0ee6d34811089ac294c8806f" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.3.0" - }, - "pyparsing": { - "hashes": [ - "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", - "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" - ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.4.7" - }, - "pytest": { - "hashes": [ - "sha256:9d1edf9e7d0b84d72ea3dbcdfd22b35fb543a5e8f2a60092dd578936bf63d7f9", - "sha256:b574b57423e818210672e07ca1fa90aaf194a4f63f3ab909a2c67ebb22913839" - ], - "index": "pypi", - "version": "==6.2.2" - }, - "toml": { - "hashes": [ - "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", - "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" - ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==0.10.2" - }, - "typed-ast": { - "hashes": [ - "sha256:07d49388d5bf7e863f7fa2f124b1b1d89d8aa0e2f7812faff0a5658c01c59aa1", - "sha256:14bf1522cdee369e8f5581238edac09150c765ec1cb33615855889cf33dcb92d", - "sha256:240296b27397e4e37874abb1df2a608a92df85cf3e2a04d0d4d61055c8305ba6", - "sha256:36d829b31ab67d6fcb30e185ec996e1f72b892255a745d3a82138c97d21ed1cd", - "sha256:37f48d46d733d57cc70fd5f30572d11ab8ed92da6e6b28e024e4a3edfb456e37", - "sha256:4c790331247081ea7c632a76d5b2a265e6d325ecd3179d06e9cf8d46d90dd151", - "sha256:5dcfc2e264bd8a1db8b11a892bd1647154ce03eeba94b461effe68790d8b8e07", - "sha256:7147e2a76c75f0f64c4319886e7639e490fee87c9d25cb1d4faef1d8cf83a440", - "sha256:7703620125e4fb79b64aa52427ec192822e9f45d37d4b6625ab37ef403e1df70", - "sha256:8368f83e93c7156ccd40e49a783a6a6850ca25b556c0fa0240ed0f659d2fe496", - "sha256:84aa6223d71012c68d577c83f4e7db50d11d6b1399a9c779046d75e24bed74ea", - "sha256:85f95aa97a35bdb2f2f7d10ec5bbdac0aeb9dafdaf88e17492da0504de2e6400", - "sha256:8db0e856712f79c45956da0c9a40ca4246abc3485ae0d7ecc86a20f5e4c09abc", - "sha256:9044ef2df88d7f33692ae3f18d3be63dec69c4fb1b5a4a9ac950f9b4ba571606", - "sha256:963c80b583b0661918718b095e02303d8078950b26cc00b5e5ea9ababe0de1fc", - "sha256:987f15737aba2ab5f3928c617ccf1ce412e2e321c77ab16ca5a293e7bbffd581", - "sha256:9ec45db0c766f196ae629e509f059ff05fc3148f9ffd28f3cfe75d4afb485412", - "sha256:9fc0b3cb5d1720e7141d103cf4819aea239f7d136acf9ee4a69b047b7986175a", - "sha256:a2c927c49f2029291fbabd673d51a2180038f8cd5a5b2f290f78c4516be48be2", - "sha256:a38878a223bdd37c9709d07cd357bb79f4c760b29210e14ad0fb395294583787", - "sha256:b4fcdcfa302538f70929eb7b392f536a237cbe2ed9cba88e3bf5027b39f5f77f", - "sha256:c0c74e5579af4b977c8b932f40a5464764b2f86681327410aa028a22d2f54937", - "sha256:c1c876fd795b36126f773db9cbb393f19808edd2637e00fd6caba0e25f2c7b64", - "sha256:c9aadc4924d4b5799112837b226160428524a9a45f830e0d0f184b19e4090487", - "sha256:cc7b98bf58167b7f2db91a4327da24fb93368838eb84a44c472283778fc2446b", - "sha256:cf54cfa843f297991b7388c281cb3855d911137223c6b6d2dd82a47ae5125a41", - "sha256:d003156bb6a59cda9050e983441b7fa2487f7800d76bdc065566b7d728b4581a", - "sha256:d175297e9533d8d37437abc14e8a83cbc68af93cc9c1c59c2c292ec59a0697a3", - "sha256:d746a437cdbca200622385305aedd9aef68e8a645e385cc483bdc5e488f07166", - "sha256:e683e409e5c45d5c9082dc1daf13f6374300806240719f95dc783d1fc942af10" - ], - "version": "==1.4.2" - }, - "typing-extensions": { - "hashes": [ - "sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918", - "sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c", - "sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f" - ], - "markers": "python_version < '3.8'", - "version": "==3.7.4.3" - } - } -} diff --git a/README.md b/README.md index aa06511..6668234 100644 --- a/README.md +++ b/README.md @@ -1,138 +1,174 @@ -[![Build Status](https://travis-ci.org/dfurtado/dataclass-csv.svg?branch=master)](https://travis-ci.org/dfurtado/dataclass-csv) -[![pypi](https://img.shields.io/pypi/v/dataclass-csv.svg)](https://pypi.python.org/pypi/dataclass-csv) -[![Downloads](https://pepy.tech/badge/dataclass-csv)](https://pepy.tech/project/dataclass-csv) +# pydantic - CSV +Pydantic CSV makes working with CSV files easier and much better than working with Dicts. It uses pydantic BaseModels to store data of every row on the CSV file and also uses type annotations which enables proper type checking and validation. +## Table of Contents -# Dataclass CSV - -Dataclass CSV makes working with CSV files easier and much better than working with Dicts. It uses Python's Dataclasses to store data of every row on the CSV file and also uses type annotations which enables proper type checking and validation. +___ +- [Main features](#main-features) +- [Installation](#installation) +- [Getting started](#getting-started) + * [Using the BasemodelCSVReader](#using-the-basemodelcsvreader) + + [Error handling](#error-handling) + + [Default values](#default-values) + + [Mapping BaseModel fields to columns](#mapping-basemodel-fields-to-columns) + + [Supported type annotation](#supported-type-annotation) + + [User-defined types](#user-defined-types) + * [Using the BasemodelCSVWriter](#using-the-basemodelcsvwriter) + + [Modifying the CSV header](#modifying-the-csv-header) +- [Copyright and License](#copyright-and-license) +- [Credits](#credits) +___ ## Main features -- Use `dataclasses` instead of dictionaries to represent the rows in the CSV file. -- Take advantage of the `dataclass` properties type annotation. `DataclassReader` use the type annotation to perform validation of the data of the CSV file. -- Automatic type conversion. `DataclassReader` supports `str`, `int`, `float`, `complex`, `datetime` and `bool`, as well as any type whose constructor accepts a string as its single argument. -- Helps you troubleshoot issues with the data in the CSV file. `DataclassReader` will show exactly in which line of the CSV file contain errors. -- Extract only the data you need. It will only parse the properties defined in the `dataclass` -- Familiar syntax. The `DataclassReader` is used almost the same way as the `DictReader` in the standard library. -- It uses `dataclass` features that let you define metadata properties so the data can be parsed exactly the way you want. -- Make the code cleaner. No more extra loops to convert data to the correct type, perform validation, set default values, the `DataclassReader` will do all this for you. -- In additon of the `DataclassReader` the library also provides a `DataclassWriter` which enables creating a CSV file -using a list of instances of a dataclass. +- Use `pydantic.BaseModel` instead of dictionaries to represent the rows in the CSV file. +- Take advantage of the `BaseModel` properties type annotation. `BasemodelCSVReader` uses the type annotation to perform validation on the data of the CSV file. +- Automatic type conversion. `BasemodelCSVReader` supports `str`, `int`, `float`, `complex`, `datetime` and `bool`, as well as any type whose constructor accepts a string as its single argument. +- Helps you troubleshoot issues with the data in the CSV file. `BasemodelCSVReader` will show exactly, which line of the CSV file contains errors. +- Extract only the data you need. It will only parse the properties defined in the `BaseModel` +- Familiar syntax. The `BasemodelCSVReader` is used almost the same way as the `DictReader` in the standard library. +- It uses `BaseModel` features that let you define Field properties or Config so the data can be parsed exactly the way you want. +- Make the code cleaner. No more extra loops to convert data to the correct type, perform validation, set default values, the `BasemodelCSVReader` will do all this for you. +- In addition to the `BasemodelCSVReader`, the library also provides a `BasemodelCSVWriter` which enables creating a CSV file using a list of instances of a BaseModel. +- Because [sqlmodel](https://github.com/tiangolo/sqlmodel) uses pydantic.BaseModels too, you can directly fill a database with data from a CSV ## Installation ```shell -pipenv install dataclass-csv +pip install pydantic-csv ``` ## Getting started -## Using the DataclassReader +### Using the BasemodelCSVReader First, add the necessary imports: ```python -from dataclasses import dataclass +from pydantic import BaseModel -from dataclass_csv import DataclassReader +from pydantic_csv import BasemodelCSVReader ``` Assuming that we have a CSV file with the contents below: ```text firstname,email,age -Elsa,elsa@test.com, 11 -Astor,astor@test.com, 7 -Edit,edit@test.com, 3 -Ella,ella@test.com, 2 +Elsa,elsa@test.com,26 +Astor,astor@test.com,44 +Edit,edit@test.com,33 +Ella,ella@test.com,22 ``` -Let's create a dataclass that will represent a row in the CSV file above: +Let's create a BaseModel that will represent a row in the CSV file above: ```python -@dataclass -class User: +class User(BaseModel): firstname: str email: str age: int ``` -The dataclass `User` has 3 properties, `firstname` and `email` is of type `str` and `age` is of type `int`. +The BaseModel `User` has 3 properties, `firstname` and `email` is of type `str` and `age` is of type `int`. -To load and read the contents of the CSV file we do the same thing as if we would be using the `DictReader` from the `csv` module in the Python's standard library. After opening the file we create an instance of the `DataclassReader` passing two arguments. The first is the `file` and the second is the dataclass that we wish to use to represent the data of every row of the CSV file. Like so: +To load and read the contents of the CSV file we do the same thing as if we would be using the `DictReader` from the `csv` module in the Python's standard library. After opening the file we create an instance of the `BasemodelCSVReader` passing two arguments. The first is the `file` and the second is the BaseModel that we wish to use to represent the data of every row of the CSV file. Like so: ```python -with open(filename) as users_csv: - reader = DataclassReader(users_csv, User) - for row in reader: - print(row) +# using file on disk +with open("") as csv: + reader = BasemodelCSVReader(csv, User) + for row in reader: + print(row) + + +# using buffer (has to be a string buffer -> convert beforehand) +buffer = io.StringIO() +buffer.seek(0) # ensure that we read from the beginning + +reader = BasemodelCSVReader(buffer, User) +for row in reader: + print(row) ``` -The `DataclassReader` internally uses the `DictReader` from the `csv` module to read the CSV file which means that you can pass the same arguments that you would pass to the `DictReader`. The complete argument list is shown below: +If you run this code you should see an output like this: ```python -dataclass_csv.DataclassReader( - f, - cls, - fieldnames=None, - restkey=None, - restval=None, - dialect='excel', - *args, - **kwds -) +User(firstname='Elsa', email='elsa@test.com', age=11) +User(firstname='Astor', email='astor@test.com', age=7) +User(firstname='Edit', email='edit@test.com', age=3) +User(firstname='Ella', email='ella@test.com', age=2) ``` -All keyword arguments support by `DictReader` are supported by the `DataclassReader`, with the addition of: +The `BasemodelCSVReader` internally uses the `DictReader` from the `csv` module to read the CSV file which means that you can pass the same arguments that you would pass to the `DictReader`. The complete argument list is shown below: -`validate_header` - The `DataclassReader` will raise a `ValueError` if the CSV file cointain columns with the same name. This -validation is performed to avoid data being overwritten. To skip this validation set `validate_header=False` when creating a -instance of the `DataclassReader`, see an example below: +```python +BasemodelCSVReader( + file_obj: Any, + model: Type[BaseModel], + *, # Note that you can't provide any value without specifying the parameter name + use_alias: bool = True, + validate_header: bool = True, + fieldnames: Optional[Sequence[str]] = None, + restkey: Optional[str] = None, + restval: Optional[Any] = None, + dialect: str = "excel", + **kwargs: Any, +) +``` +All keyword arguments supported by `DictReader` are supported by the `BasemodelCSVReader`, except `use_alias` and `validate_header`. Those are used to change the behaviour of the `BasemodelCSVReader` as follows: + +`use_alias` - The `BasemodelCSVReader` will search for column names identical to the aliases of the BaseModel Fields (if set, otherwise its names). +To avoid this behaviour and use the field names in every case set `use_alias = False` when creating an instance of the `BasemodelCSVReader`, see an example below: ```python -reader = DataclassReader(f, User, validate_header=False) +reader = BasemodelCSVReader(csv, User, use_alias=False) ``` -If you run this code you should see an output like this: +`validate_header` - The `BasemodelCSVReader` will raise a `ValueError` if the CSV file contains columns with the same name. This +validation is performed to avoid data being overwritten. To skip this validation set `validate_header=False` when creating an +instance of the `BasemodelCSVReader`, see an example below: ```python -User(firstname='Elsa', email='elsa@test.com', age=11) -User(firstname='Astor', email='astor@test.com', age=7) -User(firstname='Edit', email='edit@test.com', age=3) -User(firstname='Ella', email='ella@test.com', age=2) +reader = BasemodelCSVReader(csv, User, validate_header=False) ``` +**Important:** If two or more columns with the same name exists it tries to instantiate the BaseModel with the data from the column most right. -### Error handling +#### Error handling -One of the advantages of using the `DataclassReader` is that it makes it easy to detect when the type of data in the CSV file is not what your application's model is expecting. And, the `DataclassReader` shows errors that will help to identify the rows with problem in your CSV file. +One of the advantages of using the `BasemodelCSVReader` is that it makes it easy to detect when the type of data in the CSV file is not what your application's model is expecting. And, the `BasemodelCSVReader` shows errors that will help to identify the rows with problems in your CSV file. For example, say we change the contents of the CSV file shown in the **Getting started** section and, modify the `age` of the user Astor, let's change it to a string value: ```text -Astor, astor@test.com, test +firstname,email,age +Elsa,elsa@test.com,26 +Astor,astor@test.com,test +Edit,edit@test.com,33 +Ella,ella@test.com,22 ``` -Remember that in the dataclass `User` the `age` property is annotated with `int`. If we run the code again an exception will be raised with the message below: +Remember that in the BaseModel `User` the `age` property is annotated with `int`. If we run the code again an exception from the pydantic validation will be raised with the message below: ```text -dataclass_csv.exceptions.CsvValueError: The field `age` is defined as but -received a value of type . [CSV Line number: 3] +pydantic_csv.exceptions.CSVValueError: [Error on CSV Line number: 3] +E 1 validation error for UserOptional +E age +E Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='not a number', input_type=str] +E For further information visit https://errors.pydantic.dev/2.7/v/int_parsing ``` -Note that apart from telling what the error was, the `DataclassReader` will also show which line of the CSV file contain the data with errors. +Note that apart from telling what the error was, the `BasemodelCSVReader` will also show which line of the CSV file contain the data with errors. -### Default values +#### Default values -The `DataclassReader` also handles properties with default values. Let's modify the dataclass `User` and add a default value for the field `email`: +The `BasemodelCSVReader` also handles properties with default values. Let's modify the BaseModel `User` and add a default value for the field `email`: ```python -from dataclasses import dataclass +from pydantic import BaseModel -@dataclass -class User: +class User(BaseModel): firstname: str email: str = 'Not specified' age: int @@ -140,8 +176,12 @@ class User: And we modify the CSV file and remove the email for the user Astor: -```python -Astor,, 7 +```text +firstname,email,age +Elsa,elsa@test.com,26 +Astor,,44 +Edit,edit@test.com,33 +Ella,ella@test.com,22 ``` If we run the code we should see the output below: @@ -153,151 +193,79 @@ User(firstname='Edit', email='edit@test.com', age=3) User(firstname='Ella', email='ella@test.com', age=2) ``` -Note that now the object for the user Astor have the default value `Not specified` assigned to the email property. +Note that now the object for the user Astor has the default value `Not specified` assigned to the email property. -Default values can also be set using `dataclasses.field` like so: +Default values can also be set using `pydantic.Field` like so: ```python -from dataclasses import dataclass, field +from pydantic import BaseModel, Field -@dataclass -class User: +class User(BaseModel): firstname: str - email: str = field(default='Not specified') + email: str = Field(default='Not specified') age: int ``` -### Mapping dataclass fields to columns +#### Mapping BaseModel fields to columns -The mapping between a dataclass property and a column in the CSV file will be done automatically if the names match, however, there are situations that the name of the header for a column is different. We can easily tell the `DataclassReader` how the mapping should be done using the method `map`. Assuming that we have a CSV file with the contents below: +The mapping between a BaseModel field and a column in the CSV file will be done automatically if the names match. However, there are situations that the name of the header for a column is different. We can easily tell the `BasemodelCSVReader` how the mapping should be done using the method `map`.\ +Assuming that we have a CSV file with the contents below: ```text First Name,email,age -Elsa,elsa@test.com, 11 +Elsa,elsa@test.com,26 +Astor,astor@test.com,44 +Edit,edit@test.com,33 +Ella,ella@test.com,22 ``` -Note that now, the column is called **First Name** and not **firstname** +Note that now the column is called **First Name** and not **firstname** And we can use the method `map`, like so: ```python -reader = DataclassReader(users_csv, User) -reader.map('First name').to('firstname') +reader = BasemodelCSVReader(csv, User) +reader.map('First Name').to('firstname') ``` -Now the DataclassReader will know how to extract the data from the column **First Name** and add it to the to dataclass property **firstname** - -### Supported type annotation +Now the BasemodelCSVReader will know how to extract the data from the column **First Name** and add it to the BaseModel property **firstname** -At the moment the `DataclassReader` support `int`, `str`, `float`, `complex`, `datetime`, and `bool`. When defining a `datetime` property, it is necessary to use the `dateformat` decorator, for example: +#### Supported type annotation -```python -from dataclasses import dataclass -from datetime import datetime - -from dataclass_csv import DataclassReader, dateformat - - -@dataclass -@dateformat('%Y/%m/%d') -class User: - name: str - email: str - birthday: datetime - - -if __name__ == '__main__': - - with open('users.csv') as f: - reader = DataclassReader(f, User) - for row in reader: - print(row) -``` - -Assuming that the CSV file have the following contents: +At the moment the `BasemodelCSVReader` supports `int`, `str`, `float`, `complex`, `datetime`, and `bool`. pydantic_csv doesn't parse the date(times) itself. Thus, it relies on the datetime parsing of pydantic. Now they support some common formats and unix timestamps, but if you have a more exotic format you can use a pydantic validator. +Assuming that the CSV file has the following contents: ```text name,email,birthday -Edit,edit@test.com,2018/11/23 +Edit,edit@test.com,Sunday, 6. January 2002 ``` -The output would look like this: - -```text -User(name='Edit', email='edit@test.com', birthday=datetime.datetime(2018, 11, 23, 0, 0)) -``` - -### Fields metadata - -It is important to note that the `dateformat` decorator will define the date format that will be used to parse date to all properties -in the class. Now there are situations where the data in a CSV file contains two or more columns with date values in different formats. It is possible -to set a format specific for every property using the `dataclasses.field`. Let's say that we now have a CSV file with the following contents: - -```text -name,email,birthday, create_date -Edit,edit@test.com,2018/11/23,2018/11/23 10:43 -``` - -As you can see the `create_date` contains time information as well. - -The `dataclass` User can be defined like this: - +This would look like this: ```python -from dataclasses import dataclass, field +from pydantic import BaseModel, field_validator from datetime import datetime -from dataclass_csv import DataclassReader, dateformat - - -@dataclass -@dateformat('%Y/%m/%d') -class User: - name: str - email: str - birthday: datetime - create_date: datetime = field(metadata={'dateformat': '%Y/%m/%d %H:%M'}) -``` - -Note that the format for the `birthday` field was not speficied using the `field` metadata. In this case the format specified in the `dateformat` -decorator will be used. - -### Handling values with empty spaces - -When defining a property of type `str` in the `dataclass`, the `DataclassReader` will treat values with only white spaces as invalid. To change this -behavior, there is a decorator called `@accept_whitespaces`. When decorating the class with the `@accept_whitespaces` all the properties in the class -will accept values with only white spaces. -For example: - -```python -from dataclass_csv import DataclassReader, accept_whitespaces - -@accept_whitespaces -@dataclass -class User: +class User(BaseModel): name: str email: str birthday: datetime - created_at: datetime -``` - -If you need a specific field to accept white spaces, you can set the property `accept_whitespaces` in the field's metadata, like so: -```python -@dataclass -class User: - name: str - email: str = field(metadata={'accept_whitespaces': True}) - birthday: datetime - created_at: datetime + @field_validator("birthday", mode="before") + def parse_birthday_date(cls, value): + return datetime.strptime(value, "%A, %d. %B %Y").date() ``` -### User-defined types +#### User-defined types You can use any type for a field as long as its constructor accepts a string: ```python +import re +from pydantic import BaseModel + + class SSN: def __init__(self, val): if re.match(r"\d{9}", val): @@ -308,25 +276,23 @@ class SSN: raise ValueError(f"Invalid SSN: {val!r}") -@dataclasses.dataclass -class User: +class User(BaseModel): name: str ssn: SSN ``` -## Using the DataclassWriter +### Using the BasemodelCSVWriter -Reading a CSV file using the `DataclassReader` is great and gives us the type-safety of Python's dataclasses and type annotation, however, there are situations where we would like to use dataclasses for creating CSV files, that's where the `DataclassWriter` comes in handy. +Reading a CSV file using the `BasemodelCSVReader` is great and gives us the type-safety of Pydantic's BaseModels and type annotation, however, there are situations where we would like to use BaseModels for creating CSV files, that's where the `BasemodelCSVWriter` comes in handy. -Using the `DataclassWriter` is quite simple. Given that we have a dataclass `User`: +Using the `BasemodelCSVWriter` is quite simple. Given that we have a Basemodel `User`: ```python -from dataclasses import dataclass +from pydantic import BaseModel -@dataclass -class User: +class User(BaseModel): firstname: str lastname: str age: int @@ -335,83 +301,98 @@ class User: And in your program we have a list of users: ```python - users = [ User(firstname="John", lastname="Smith", age=40), - User(firstname="Daniel", lastname="Nilsson", age=10), - User(firstname="Ella", "Fralla", age=4) + User(firstname="Daniel", lastname="Nilsson", age=23), + User(firstname="Ella", lastname="Fralla", age=28) ] ``` -In order to create a CSV using the `DataclassWriter` import it from `dataclass_csv`: +In order to create a CSV using the `BasemodelCSVWriter` import it from `pydantic_csv`: ```python -from dataclass_csv import DataclassWriter +from pydantic_csv import BasemodelCSVReader ``` Initialize it with the required arguments and call the method `write`: ```python -with open("users.csv", "w") as f: - w = DataclassWriter(f, users, User) - w.write() +# using file on disk +with open("") as csv: + writer = BasemodelCSVWriter(csv, users, User) + writer.write() + + +# using buffer (has to be a StringBuffer) +writer = BasemodelCSVWriter(buffer, users, User) +writer.write() + +buffer.seek(0) # ensure that the next working steps start at the beginning of the "file" + +# if you need a BytesBuffer just convert it: +bytes_buffer: io.BytesIO = io.BytesIO(buffer.read().encode("utf-8")) +bytes_buffer.name = buffer.name +bytes_buffer.seek(0) # ensure that the next working steps start at the beginning of the "file" ``` That's it! Let's break down the snippet above. -First, we open a file called `user.csv` for writing. After that, an instance of the `DataclassWriter` is created. To create a `DataclassWriter` we need to pass the `file`, the list of `User` instances, and lastly, the type, which in this case is `User`. +First, we open a file called `user.csv` for writing. After that, an instance of the `BasemodelCSVWriter` is created. To create a `BasemodelCSVWriter` we need to pass the `file_obj`, the list of `User` instances, and lastly, the type, which in this case is `User`. -The type is required since the writer uses it when trying to figure out the CSV header. By default, it will use the names of the -properties defined in the dataclass, in the case of the dataclass `User` the title of each column -will be `firstname`, `lastname` and `age`. +The type is required since the writer uses it when trying to figure out the CSV header. By default, it will use the alias of the field otherwise its name +defined in the BaseModel, in the case of the BaseModel `User` the title of each column will be `firstname`, `lastname` and `age`. See below the CSV created out of a list of `User`: ```text firstname,lastname,age John,Smith,40 -Daniel,Nilsson,10 -Ella,Fralla,4 +Daniel,Nilsson,23 +Ella,Fralla,28 ``` -The `DataclassWriter` also takes a `**fmtparams` which accepts the same parameters as the `csv.writer`, for more +The `BasemodelCSVWriter` also takes `**fmtparams` which accepts the same parameters as the `csv.writer`. For more information see: https://docs.python.org/3/library/csv.html#csv-fmt-params Now, there are situations where we don't want to write the CSV header. In this case, the method `write` of -the `DataclassWriter` accepts an extra argument, called `skip_header`. The default value is `False` and when set to +the `BasemodelCSVWriter` accepts an extra argument, called `skip_header`. The default value is `False` and when set to `True` it will skip the header. #### Modifying the CSV header -As previously mentioned the `DataclassWriter` uses the names of the properties defined in the dataclass as the CSV header titles, however, -depending on your use case it makes sense to change it. The `DataclassWriter` has a `map` method just for this purpose. +As previously mentioned the `BasemodelCSVWriter` uses the aliases or names of the fields defined in the BaseModel as the CSV header titles. +If you don't want the `BasemodelCSVWriter` to use the aliases and only the names you can set `use_alias` to `False`. This will look like this: +```python +writer = BasemodelCSVWriter(file_obj, users, User, use_alias=False) +``` - Using the `User` dataclass with the properties `firstname`, `lastname` and `age`. The snippet below shows how to change `firstname` to `First name` and `lastname` to `Last name`: +However, depending on your use case it makes sense to set custom Headers and not use the aliases or names at all. The `BasemodelCSVWriter` has a `map` method just for this purpose. + + Using the `User` BaseModel with the properties `firstname`, `lastname` and `age`. The snippet below shows how to change `firstname` to `First name` and `lastname` to `Last name`: ```python - with open("users.csv", "w") as f: - w = DataclassWriter(f, users, User) + with open("", "w") as file: + writer = BasemodelCSVWriter(file, users, User) # Add mappings for firstname and lastname - w.map("firstname").to("First name") - w.map("lastname").to("Last name") + writer.map("firstname").to("First Name") + writer.map("lastname").to("Last Name") - w.write() + writer.write() ``` The CSV output of the snippet above will be: ```text -First name,Last name,age +First Name,Last Name,age John,Smith,40 -Daniel,Nilsson,10 -Ella,Fralla,4 +Daniel,Nilsson,23 +Ella,Fralla,28 ``` ## Copyright and License -Copyright (c) 2018 Daniel Furtado. Code released under BSD 3-clause license +Copyright (c) 2024 Nathan Richard. Code released under BSD 3-clause license ## Credits - -This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter) and the [audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage) project template. +A huge shoutout to Daniel Furtado ([github](https://github.com/dfurtado)) and his python package 'dataclass-csv' ([pypi](https://pypi.org/project/dataclass-csv/) | [github](https://github.com/dfurtado/dataclass-csv)). The most of the Codebase and Documentation is from him and just adjusted for using pydantic.BaseModel. diff --git a/dataclass_csv/__init__.py b/dataclass_csv/__init__.py deleted file mode 100644 index 7366f6f..0000000 --- a/dataclass_csv/__init__.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -dataclass_csv -~~~~~~~~~~~~~ - -The dataclass_csv is a library that parses every row of a CSV file into -`dataclasses`. It takes advantage of `dataclasses` features to perform -data validation and type conversion. - -Basic Usage -~~~~~~~~~~~~~ - -Read data from a CSV file: - - >>> from dataclasses import dataclass - >>> from dataclass_csv import DataclassReader - - - >>> @dataclass - >>> class User: - >>> firstname: str - >>> lastname: str - >>> age: int - - >>> with open('users.csv') as f: - >>> reader = DataclassReader(f, User) - >>> users = list(reader) - >>> print(users) - [ - User(firstname='User1', lastname='Test', age=23), - User(firstname='User2', lastname='Test', age=34) - ] - -Write dataclasses to a CSV file: - - >>> from dataclasses import dataclass - >>> from dataclass_csv import DataclassWriter - - >>> @dataclass - >>> class User: - >>> firstname: str - >>> lastname: str - >>> age: int - - >>> users = [ - >>> User(firstname='User1', lastname='Test', age=23), - >>> User(firstname='User2', lastname='Test', age=34) - >>> ] - - >>> with open('users.csv', 'w') as f: - >>> writer = DataclassWriter(f, users, User) - >>> writer.write() - - -:copyright: (c) 2018 by Daniel Furtado. -:license: BSD, see LICENSE for more details. -""" - - -from .dataclass_reader import DataclassReader -from .dataclass_writer import DataclassWriter -from .decorators import dateformat, accept_whitespaces -from .exceptions import CsvValueError - - -__all__ = [ - "DataclassReader", - "DataclassWriter", - "dateformat", - "accept_whitespaces", - "CsvValueError", -] diff --git a/dataclass_csv/__init__.pyi b/dataclass_csv/__init__.pyi deleted file mode 100644 index 973174a..0000000 --- a/dataclass_csv/__init__.pyi +++ /dev/null @@ -1,7 +0,0 @@ -from .dataclass_reader import DataclassReader as DataclassReader -from .dataclass_writer import DataclassWriter as DataclassWriter -from .decorators import ( - accept_whitespaces as accept_whitespaces, - dateformat as dateformat, -) -from .exceptions import CsvValueError as CsvValueError diff --git a/dataclass_csv/dataclass_reader.py b/dataclass_csv/dataclass_reader.py deleted file mode 100644 index 6467c21..0000000 --- a/dataclass_csv/dataclass_reader.py +++ /dev/null @@ -1,258 +0,0 @@ -import dataclasses -import csv - -from datetime import date, datetime -from distutils.util import strtobool -from typing import Union, Type, Optional, Sequence, Dict, Any, List - -import typing - -from .field_mapper import FieldMapper -from .exceptions import CsvValueError - -from collections import Counter - - -def _verify_duplicate_header_items(header): - if header is not None and len(header) == 0: - return - - header_counter = Counter(header) - duplicated = [k for k, v in header_counter.items() if v > 1] - - if len(duplicated) > 0: - raise ValueError( - ( - "It seems like the CSV file contain duplicated header " - f"values: {duplicated}. This may cause inconsistent data. " - "Use the kwarg validate_header=False when initializing the " - "DataclassReader to skip the header validation." - ) - ) - - -def is_union_type(t): - if hasattr(t, "__origin__") and t.__origin__ is Union: - return True - - return False - - -def get_args(t): - if hasattr(t, "__args__"): - return t.__args__ - - return tuple() - - -class DataclassReader: - def __init__( - self, - f: Any, - cls: Type[object], - fieldnames: Optional[Sequence[str]] = None, - restkey: Optional[str] = None, - restval: Optional[Any] = None, - dialect: str = "excel", - *args: Any, - **kwds: Any, - ): - - if not f: - raise ValueError("The f argument is required.") - - if cls is None or not dataclasses.is_dataclass(cls): - raise ValueError("cls argument needs to be a dataclass.") - - self._cls = cls - self._optional_fields = self._get_optional_fields() - self._field_mapping: Dict[str, Dict[str, Any]] = {} - - validate_header = kwds.pop("validate_header", True) - - self._reader = csv.DictReader( - f, fieldnames, restkey, restval, dialect, *args, **kwds - ) - - if validate_header: - _verify_duplicate_header_items(self._reader.fieldnames) - - self.type_hints = typing.get_type_hints(cls) - - def _get_optional_fields(self): - return [ - field.name - for field in dataclasses.fields(self._cls) - if not isinstance(field.default, dataclasses._MISSING_TYPE) - or not isinstance(field.default_factory, dataclasses._MISSING_TYPE) - ] - - def _add_to_mapping(self, property_name, csv_fieldname): - self._field_mapping[property_name] = csv_fieldname - - def _get_metadata_option(self, field, key): - option = field.metadata.get(key, getattr(self._cls, f"__{key}__", None)) - return option - - def _get_default_value(self, field): - return ( - field.default - if not isinstance(field.default, dataclasses._MISSING_TYPE) - else field.default_factory() - ) - - def _get_possible_keys(self, fieldname, row): - possible_keys = list(filter(lambda x: x.strip() == fieldname, row.keys())) - if possible_keys: - return possible_keys[0] - - def _get_value(self, row, field): - is_field_mapped = False - - try: - if field.name in self._field_mapping.keys(): - is_field_mapped = True - key = self._field_mapping.get(field.name) - else: - key = field.name - - if key in row.keys(): - value = row[key] - else: - possible_key = self._get_possible_keys(field.name, row) - key = possible_key if possible_key else key - value = row[key] - - except KeyError: - if field.name in self._optional_fields: - return self._get_default_value(field) - else: - keyerror_message = f"The value for the column `{field.name}`" - if is_field_mapped: - keyerror_message = f"The value for the mapped column `{key}`" - raise KeyError(f"{keyerror_message} is missing in the CSV file") - else: - if not value and field.name in self._optional_fields: - return self._get_default_value(field) - elif not value and field.name not in self._optional_fields: - raise ValueError(f"The field `{field.name}` is required.") - elif ( - value - and field.type is str - and not len(value.strip()) - and not self._get_metadata_option(field, "accept_whitespaces") - ): - raise ValueError( - ( - f"It seems like the value of `{field.name}` contains " - "only white spaces. To allow white spaces to all " - "string fields, use the @accept_whitespaces " - "decorator. " - "To allow white spaces specifically for the field " - f"`{field.name}` change its definition to: " - f"`{field.name}: str = field(metadata=" - "{'accept_whitespaces': True})`." - ) - ) - else: - return value - - def _parse_date_value(self, field, date_value, field_type): - dateformat = self._get_metadata_option(field, "dateformat") - - if not isinstance(date_value, str): - return date_value - - if not dateformat: - raise AttributeError( - ( - "Unable to parse the datetime string value. Date format " - "not specified. To specify a date format for all " - "datetime fields in the class, use the @dateformat " - "decorator. To define a date format specifically for this " - "field, change its definition to: " - f"`{field.name}: datetime = field(metadata=" - "{'dateformat': })`." - ) - ) - - datetime_obj = datetime.strptime(date_value, dateformat) - - if field_type == date: - return datetime_obj.date() - else: - return datetime_obj - - def _process_row(self, row): - values = dict() - - for field in dataclasses.fields(self._cls): - if not field.init: - continue - - try: - value = self._get_value(row, field) - except ValueError as ex: - raise CsvValueError(ex, line_number=self._reader.line_num) from None - - if not value and field.default is None: - values[field.name] = None - continue - - field_type = self.type_hints[field.name] - - if is_union_type(field_type): - type_args = [x for x in get_args(field_type) if x is not type(None)] - if len(type_args) == 1: - field_type = type_args[0] - - if field_type is datetime or field_type is date: - try: - transformed_value = self._parse_date_value(field, value, field_type) - except ValueError as ex: - raise CsvValueError(ex, line_number=self._reader.line_num) from None - else: - values[field.name] = transformed_value - continue - - if field_type is bool: - try: - transformed_value = ( - value - if isinstance(value, bool) - else strtobool(str(value).strip()) == 1 - ) - except ValueError as ex: - raise CsvValueError(ex, line_number=self._reader.line_num) from None - else: - values[field.name] = transformed_value - continue - - try: - transformed_value = field_type(value) - except ValueError as e: - raise CsvValueError( - ( - f"The field `{field.name}` is defined as {field.type} " - f"but received a value of type {type(value)}." - ), - line_number=self._reader.line_num, - ) from e - else: - values[field.name] = transformed_value - return self._cls(**values) - - def __next__(self): - row = next(self._reader) - return self._process_row(row) - - def __iter__(self): - return self - - def map(self, csv_fieldname: str) -> FieldMapper: - """Used to map a field in the CSV file to a `dataclass` field - :param csv_fieldname: The name of the CSV field - """ - return FieldMapper( - lambda property_name: self._add_to_mapping(property_name, csv_fieldname) - ) diff --git a/dataclass_csv/dataclass_reader.pyi b/dataclass_csv/dataclass_reader.pyi deleted file mode 100644 index 155bd11..0000000 --- a/dataclass_csv/dataclass_reader.pyi +++ /dev/null @@ -1,18 +0,0 @@ -from .field_mapper import FieldMapper as FieldMapper -from typing import Any, Optional, Sequence, Type - -class DataclassReader: - def __init__( - self, - f: Any, - cls: Type[object], - fieldnames: Optional[Sequence[str]] = ..., - restkey: Optional[str] = ..., - restval: Optional[Any] = ..., - dialect: str = ..., - *args: Any, - **kwds: Any - ) -> None: ... - def __next__(self) -> None: ... - def __iter__(self) -> Any: ... - def map(self, csv_fieldname: str) -> FieldMapper: ... diff --git a/dataclass_csv/dataclass_writer.py b/dataclass_csv/dataclass_writer.py deleted file mode 100644 index fdcbb52..0000000 --- a/dataclass_csv/dataclass_writer.py +++ /dev/null @@ -1,68 +0,0 @@ -import csv -import dataclasses -from typing import Type, Dict, Any, List -from .header_mapper import HeaderMapper - - -class DataclassWriter: - def __init__( - self, - f: Any, - data: List[Any], - cls: Type[object], - dialect: str = "excel", - **fmtparams: Dict[str, Any], - ): - if not f: - raise ValueError("The f argument is required") - - if not isinstance(data, list): - raise ValueError("Invalid 'data' argument. It must be a list") - - if not dataclasses.is_dataclass(cls): - raise ValueError("Invalid 'cls' argument. It must be a dataclass") - - self._data = data - self._cls = cls - self._field_mapping: Dict[str, str] = dict() - - self._fieldnames = [x.name for x in dataclasses.fields(cls)] - - self._writer = csv.writer(f, dialect=dialect, **fmtparams) - - def _add_to_mapping(self, header: str, propname: str): - self._field_mapping[propname] = header - - def _apply_mapping(self): - mapped_fields = [] - - for field in self._fieldnames: - mapped_item = self._field_mapping.get(field, field) - mapped_fields.append(mapped_item) - - return mapped_fields - - def write(self, skip_header: bool = False): - if not skip_header: - if self._field_mapping: - self._fieldnames = self._apply_mapping() - - self._writer.writerow(self._fieldnames) - - for item in self._data: - if not isinstance(item, self._cls): - raise TypeError( - ( - f"The item [{item}] is not an instance of " - f"{self._cls.__name__}. All items on the list must be " - "instances of the same type" - ) - ) - row = dataclasses.astuple(item) - self._writer.writerow(row) - - def map(self, propname: str) -> HeaderMapper: - """Used to map a field in the dataclass to header item in the CSV file - :param propname: The name of the property of the dataclass to be mapped - """ - return HeaderMapper(lambda header: self._add_to_mapping(header, propname)) diff --git a/dataclass_csv/dataclass_writer.pyi b/dataclass_csv/dataclass_writer.pyi deleted file mode 100644 index 9c0d76f..0000000 --- a/dataclass_csv/dataclass_writer.pyi +++ /dev/null @@ -1,14 +0,0 @@ -from .header_mapper import HeaderMapper as HeaderMapper -from typing import Any, Dict, List, Type - -class DataclassWriter: - def __init__( - self, - f: Any, - data: List[Any], - cls: Type[object], - dialect: str = ..., - **fmtparams: Dict[str, Any], - ) -> None: ... - def write(self, skip_header: bool = ...) -> Any: ... - def map(self, propname: str) -> HeaderMapper: ... diff --git a/dataclass_csv/decorators.py b/dataclass_csv/decorators.py deleted file mode 100644 index 67f4255..0000000 --- a/dataclass_csv/decorators.py +++ /dev/null @@ -1,57 +0,0 @@ -from typing import Any, Callable, TypeVar, Type - -F = TypeVar("F", bound=Callable[..., Any]) - - -def dateformat(date_format: str) -> Callable[[F], F]: - """The dateformat decorator is used to specify the format - the `DataclassReader` should use when parsing datetime strings. - - Usage: - >>> from dataclasses import dataclass - >>> from datetime import datetime - >>> from dataclass_csv import dateformat - - >>> @dataclass - >>> @dateformat('%Y-%m-%d') - >>> class User: - >>> firstname: str - >>> lastname: str - >>> brithday: datetime - """ - - if not date_format or not isinstance(date_format, str): - raise ValueError("Invalid value for the date_format argument") - - def func(cls): - cls.__dateformat__ = date_format - return cls - - return func - - -def accept_whitespaces(_cls: Type[Any] = None) -> Callable[[F], F]: - """The accept_whitespaces decorator tells the `DataclassReader` - that `str` fields defined in the `dataclass` should accept - values containing only white spaces. - - Usage: - >>> from dataclasses import dataclass - >>> from dataclass_csv import accept_whitespaces - - >>> @dataclass - >>> @accept_whitespaces - >>> class User: - >>> firstname: str - >>> lastname: str - >>> brithday: datetime - """ - - def func(cls): - cls.__accept_whitespaces__ = True - return cls - - if _cls: - return func(_cls) - - return func diff --git a/dataclass_csv/decorators.pyi b/dataclass_csv/decorators.pyi deleted file mode 100644 index 3f0a372..0000000 --- a/dataclass_csv/decorators.pyi +++ /dev/null @@ -1,6 +0,0 @@ -from typing import Any, Callable, Type, TypeVar - -F = TypeVar("F", bound=Callable[..., Any]) - -def dateformat(date_format: str) -> Callable[[F], F]: ... -def accept_whitespaces(_cls: Type[Any] = ...) -> Callable[[F], F]: ... diff --git a/dataclass_csv/exceptions.py b/dataclass_csv/exceptions.py deleted file mode 100644 index 8e5ddf1..0000000 --- a/dataclass_csv/exceptions.py +++ /dev/null @@ -1,12 +0,0 @@ -from typing import Any - - -class CsvValueError(Exception): - """Error when a value in the CSV file cannot be parsed.""" - - def __init__(self, error: Any, line_number: int): - self.error: Any = error - self.line_number: int = line_number - - def __str__(self): - return f"{self.error} [CSV Line number: {self.line_number}]" diff --git a/dataclass_csv/exceptions.pyi b/dataclass_csv/exceptions.pyi deleted file mode 100644 index 7040a34..0000000 --- a/dataclass_csv/exceptions.pyi +++ /dev/null @@ -1,6 +0,0 @@ -from typing import Any - -class CsvValueError(Exception): - error: Any = ... - line_number: Any = ... - def __init__(self, error: Any, line_number: int) -> None: ... diff --git a/dataclass_csv/field_mapper.py b/dataclass_csv/field_mapper.py deleted file mode 100644 index 651d942..0000000 --- a/dataclass_csv/field_mapper.py +++ /dev/null @@ -1,18 +0,0 @@ -from typing import Callable - - -class FieldMapper: - """The `FieldMapper` class is used to explicitly map a field - in the CSV file to a specific `dataclass` field. - """ - - def __init__(self, callback: Callable[[str], None]): - def to(property_name: str) -> None: - """Specify the dataclass field to receive the value - :param property_name: The dataclass property that - will receive the csv value. - """ - - callback(property_name) - - self.to: Callable[[str], None] = to diff --git a/dataclass_csv/field_mapper.pyi b/dataclass_csv/field_mapper.pyi deleted file mode 100644 index 66a04a9..0000000 --- a/dataclass_csv/field_mapper.pyi +++ /dev/null @@ -1,5 +0,0 @@ -from typing import Any, Callable - -class FieldMapper: - to: Any = ... - def __init__(self, callback: Callable[[str], None]) -> None: ... diff --git a/dataclass_csv/header_mapper.py b/dataclass_csv/header_mapper.py deleted file mode 100644 index 5f03af7..0000000 --- a/dataclass_csv/header_mapper.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Callable - - -class HeaderMapper: - """The `HeaderMapper` class is used to explicitly map property in a - dataclass to a header. Useful when the header on the CSV file needs to - be different from a dataclass property name. - """ - - def __init__(self, callback: Callable[[str], None]): - def to(header: str) -> None: - """Specify how a property in the dataclass will be - displayed in the CSV file - :param header: Specify the CSV title for the dataclass property - """ - - callback(header) - - self.to: Callable[[str], None] = to diff --git a/dataclass_csv/header_mapper.pyi b/dataclass_csv/header_mapper.pyi deleted file mode 100644 index c2cc1b3..0000000 --- a/dataclass_csv/header_mapper.pyi +++ /dev/null @@ -1,5 +0,0 @@ -from typing import Any, Callable - -class HeaderMapper: - to: Any = ... - def __init__(self, callback: Callable[[str], None]) -> None: ... diff --git a/pydantic_csv/__init__.py b/pydantic_csv/__init__.py new file mode 100644 index 0000000..77186e0 --- /dev/null +++ b/pydantic_csv/__init__.py @@ -0,0 +1,63 @@ +""" +pydantic_csv +~~~~~~~~~~~~~ + +The pydantic_csv is a library that parses every row of a CSV file into +`pydantic.BaseModels`. It takes advantage of `BaseModel` features to perform +data validation and type conversion. + +Basic Usage +~~~~~~~~~~~~~ + +Read data from a CSV file: + + >>> from pydantic import BaseModel + >>> from pydantic_csv import BasemodelCSVReader + + >>> class User(BaseModel): + >>> firstname: str + >>> lastname: str + >>> age: int + + >>> with open('users.csv') as csv: + >>> reader = BasemodelCSVReader(csv, User) + >>> users = list(reader) + >>> print(users) + [ + User(firstname='User1', lastname='Test', age=23), + User(firstname='User2', lastname='Test', age=34) + ] + +Write BaseModels to a CSV file: + + >>> from pydantic import BaseModel + >>> from pydantic_csv import BasemodelCSVWriter + + >>> class User(BaseModel): + >>> firstname: str + >>> lastname: str + >>> age: int + + >>> users = [ + >>> User(firstname='User1', lastname='Test', age=23), + >>> User(firstname='User2', lastname='Test', age=34) + >>> ] + + >>> with open('users.csv', 'w') as csv: + >>> writer = BasemodelCSVWriter(csv, users, User) + >>> writer.write() + + +:copyright: (c) 2024 by Nathan Richard. +:license: BSD, see LICENSE for more details. +""" + +from .basemodel_csv_reader import BasemodelCSVReader +from .basemodel_csv_writer import BasemodelCSVWriter +from .exceptions import CSVValueError + +__all__ = [ + "BasemodelCSVReader", + "BasemodelCSVWriter", + "CSVValueError", +] diff --git a/pydantic_csv/basemodel_csv_reader.py b/pydantic_csv/basemodel_csv_reader.py new file mode 100644 index 0000000..7735734 --- /dev/null +++ b/pydantic_csv/basemodel_csv_reader.py @@ -0,0 +1,178 @@ +""" +module containing the BasemodelCSVReader Class to read from a CSV file and the parse it into a pydantic.BaseModel +""" + +import csv +import typing +from collections import Counter +from collections.abc import Sequence +from typing import Any, Optional, Union + +import pydantic +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +from .exceptions import CSVValueError +from .header_mapper import HeaderMapper + + +def _verify_duplicate_header_items(header: Sequence[str]) -> Optional[list[str]]: + if header is not None and len(header) == 0: + return + + header_counter = Counter(header) + duplicated = [k for k, v in header_counter.items() if v > 1] + + if len(duplicated) > 0: + raise ValueError( + f"It seems like the CSV file contain duplicated header values: {duplicated}. This may cause " + "inconsistent data. Use the kwarg validate_header=False when initializing the DataclassReader to skip " + "the header validation." + ) + + +def _is_union_type(t) -> bool: + return hasattr(t, "__origin__") and t.__origin__ is Union + + +def _get_args(t): + return getattr(t, "__args__", tuple()) + + +class BasemodelCSVReader: + """ + The Reader, which takes a file object and the BaseModel in which each row of the CSV should be parsed and returned + """ + + def __init__( + self, + file_obj: Any, + model: type[BaseModel], + *, + use_alias: bool = True, + validate_header: bool = True, + fieldnames: Optional[Sequence[str]] = None, + restkey: Optional[str] = None, + restval: Optional[Any] = None, + dialect: str = "excel", + **kwargs: Any, + ): + + if not file_obj: + raise ValueError("The 'file_obj' argument is required") + + if model is None or not issubclass(model, pydantic.BaseModel): + raise ValueError("cls argument needs to be a pydantic BaseModel.") + + self._model = model + self._use_alias = use_alias + self._optional_fields = self._get_optional_fields() + self._field_mapping: dict[str, str] = {} + + self._reader = csv.DictReader(file_obj, fieldnames, restkey, restval, dialect, **kwargs) + + if validate_header: + _verify_duplicate_header_items(self._reader.fieldnames) + + self.type_hints = typing.get_type_hints(model) + + def _get_optional_fields(self) -> list[str]: + if self._use_alias: + return [field.alias or name for name, field in self._model.model_fields.items() if not field.is_required()] + return [name for name, field in self._model.model_fields.items() if not field.is_required()] + + def _add_to_mapping(self, fieldname: str, csv_fieldname: str) -> None: + self._field_mapping[fieldname] = csv_fieldname + + @staticmethod + def _get_default_value(field: FieldInfo) -> Any: + if field.default_factory: + return field.default_factory() + return field.default + + @staticmethod + def _get_possible_keys(fieldname: str, row: dict) -> Optional[str]: + possible_keys = filter(lambda x: x.strip() == fieldname, row) + return next(possible_keys, None) + + def _get_value(self, row: dict, fieldname: str, field: FieldInfo) -> Any: + is_field_mapped = False + + if fieldname in self._field_mapping: + is_field_mapped = True + key = self._field_mapping.get(fieldname) + else: + key = fieldname + + try: + if key in row: + value = row[key] + else: + possible_key = self._get_possible_keys(fieldname, row) + key = possible_key if possible_key else key + value = row[key] + + except KeyError as e: + if fieldname in self._optional_fields: + return self._get_default_value(field) + + keyerror_message = f"The value for the column `{fieldname}`" + if is_field_mapped: + keyerror_message = f"The value for the mapped column `{key}`" + raise KeyError(f"{keyerror_message} is missing in the CSV file") from e + + if not value and fieldname in self._optional_fields: + return self._get_default_value(field) + + if not value and fieldname not in self._optional_fields: + raise ValueError(f"The field `{fieldname}` is required.") + + return value + + def _process_row(self, row: dict) -> BaseModel: + values = {} + + for name, field in self._model.model_fields.items(): + + if self._use_alias: + fieldname = field.alias or name + else: + fieldname = name + + try: + value = self._get_value(row, fieldname, field) + if not value and field.default is None: + values[fieldname] = None + else: + values[fieldname] = value + except ValueError as e: + raise CSVValueError(e, line_number=self._reader.line_num) from None + + try: + return self._model(**values) + except pydantic.ValidationError as e: + raise CSVValueError( + str(e), + line_number=self._reader.line_num, + ) from e + + def __next__(self): + row: dict = next(self._reader) + return self._process_row(row) + + def __iter__(self): + return self + + def map(self, header: str) -> HeaderMapper: + """ + reader.map("First Name").to("firstname") + Used to map Column name in the Header of the CSV file to a BaseModel field. + + Args: + header (str): The Column Name of the Header in the CSV file + + Returns: + HeaderMapper: HeaderMapper instance which then can be used to set the BaseModel field name + (.to("firstname")) + """ + return HeaderMapper(lambda fieldname: self._add_to_mapping(fieldname, header)) diff --git a/pydantic_csv/basemodel_csv_writer.py b/pydantic_csv/basemodel_csv_writer.py new file mode 100644 index 0000000..0396893 --- /dev/null +++ b/pydantic_csv/basemodel_csv_writer.py @@ -0,0 +1,101 @@ +""" +module containing the BasemodelCSVWriter Class to write from instances of a BaseModel to a CSV File +""" + +import csv +from typing import Any + +import pydantic +from pydantic import BaseModel + +from .header_mapper import HeaderMapper + + +class BasemodelCSVWriter: + """ + The Writer, which takes a file object and instances of BaseModels in order to write them to a CSV file + """ + + def __init__( + self, + file_obj: Any, + data: list[Any], + model: type[BaseModel], + *, + use_alias: bool = True, + dialect: str = "excel", + **kwargs: Any, + ): + if not file_obj: + raise ValueError("The 'file_obj' argument is required") + + if not isinstance(data, list): + raise ValueError("Invalid 'data' argument. It must be a list") + + if model is None or not issubclass(model, pydantic.BaseModel): + raise ValueError("Invalid 'cls' argument. It must be a pydantic BaseModel") + + self._data = data + self._model = model + self._field_mapping: dict[str, str] = {} + + if use_alias: + self._fieldnames = [field.alias or name for name, field in self._model.model_fields.items()] + else: + self._fieldnames = model.model_fields.keys() + + self._writer = csv.writer(file_obj, dialect=dialect, **kwargs) + + def _add_to_mapping(self, header: str, fieldname: str) -> None: + self._field_mapping[fieldname] = header + + def _apply_mapping(self) -> list[str]: + mapped_fields = [] + + for field in self._fieldnames: + mapped_item = self._field_mapping.get(field, field) + mapped_fields.append(mapped_item) + + return mapped_fields + + def write(self, skip_header: bool = False) -> None: + """ + Used to start the writing process. Afterward the provided data will be written to a CSV file. + **Important:** Remember to do all the mappings beforehand. Afterward it's too late. + + Args: + skip_header (bool): + + Returns: + None: well, nothing + """ + if not skip_header: + if self._field_mapping: + self._fieldnames = self._apply_mapping() + + self._writer.writerow(self._fieldnames) + + for item in self._data: + if not isinstance(item, self._model): + raise TypeError( + f"The item [{item}] is not an instance of " + f"{self._model.__name__}. All items on the list must be " + "instances of the same type" + ) + row = item.model_dump().values() + self._writer.writerow(row) + + def map(self, fieldname: str) -> HeaderMapper: + """ + writer.map("firstname").to("First Name") + Used to map a BaseModel field to Column name in the Header of the CSV file + **Important:** If not specifically set `writer = BasemodelCSVWriter(file_obj, data, BaseModel, + *use_alias=false*)` you have to use the alias name of the field. (Of course only if one is set) + + Args: + fieldname (str): The name of the BaseModel field to be mapped + + Returns: + HeaderMapper: HeaderMapper instance which then can be used to set the Header name (.to("First Name")) + """ + return HeaderMapper(lambda header: self._add_to_mapping(header, fieldname)) diff --git a/pydantic_csv/exceptions.py b/pydantic_csv/exceptions.py new file mode 100644 index 0000000..1ae3a25 --- /dev/null +++ b/pydantic_csv/exceptions.py @@ -0,0 +1,19 @@ +""" +model containing custom Exceptions +""" + +from typing import Any + + +class CSVValueError(Exception): + """ + Raised Exception if a problem with the value in the CSV file occurs. Also prints out the line where the problem + happened. + """ + + def __init__(self, error: Any, line_number: int): + self.error: Any = error + self.line_number: int = line_number + + def __str__(self): + return f"[Error on CSV Line number: {self.line_number}]\n{self.error}" diff --git a/pydantic_csv/header_mapper.py b/pydantic_csv/header_mapper.py new file mode 100644 index 0000000..46f88ad --- /dev/null +++ b/pydantic_csv/header_mapper.py @@ -0,0 +1,32 @@ +""" +This module contains the HeaderMapper which can be used to define different Headers than the standard with the Field +alias/name. +""" + +from typing import Callable + + +class HeaderMapper: + """ + The `HeaderMapper` class is used to explicitly map a field of the BaseModel to a Header. Useful when the header on + the CSV file needs to be different from a BaseModel field name. + """ + + def __init__(self, callback: Callable[[str], None]): + def to(name: str) -> None: + """ + When writing: + Specify the CSV Header that should be used instead of the BaseModel field name + + when reading: + Specify the BaseModel field name that should listen to the CSV Header + **Important:** If not specifically set + `reader = BasemodelCSVReader(file_obj, BaseModel, *use_alias=false*)` you have to use the alias name of + the field (Of course only if one is set) + + Args: + name (str): The CSV Header name (writing) or the BaseModel field name (reading) + """ + callback(name) + + self.to: Callable[[str], None] = to diff --git a/dataclass_csv/py.typed b/pydantic_csv/py.typed similarity index 100% rename from dataclass_csv/py.typed rename to pydantic_csv/py.typed diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ddbfb4e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[tool.poetry] +name = "pydantic-csv" +version = "0.1.0" +description = "" +authors = ["Nathan Richard "] +license = "LICENSE" +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.9" +pydantic = "^2.7.4" +pytest = "^8.2.2" +pre-commit = "^3.7.1" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 43bd1f6..0000000 --- a/setup.cfg +++ /dev/null @@ -1,26 +0,0 @@ -[bumpversion] -current_version = 1.4.0 -commit = True -tag = True - -[bumpversion:file:setup.py] -search = version='{current_version}' -replace = version='{new_version}' - -[bumpversion:file:easycsv/__init__.py] -search = __version__ = '{current_version}' -replace = __version__ = '{new_version}' - -[bdist_wheel] -universal = 1 - -[flake8] -exclude = docs -max-line-length = 88 - -[aliases] -# Define setup.py command aliases here -test = pytest - -[tool:pytest] -collect_ignore = ['setup.py'] diff --git a/setup.py b/setup.py deleted file mode 100644 index b998ace..0000000 --- a/setup.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""The setup script.""" - -from setuptools import setup, find_packages - -with open("README.md") as readme_file: - readme = readme_file.read() - -with open("HISTORY.md") as history_file: - history = history_file.read() - -requirements = [] - -setup_requirements = [ - "pytest-runner", -] - -test_requirements = [ - "pytest", -] - -setup( - author="Daniel Furtado", - author_email="daniel@dfurtado.com", - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", - "Natural Language :: English", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.7", - "Operating System :: Microsoft :: Windows", - "Operating System :: MacOS :: MacOS X", - "Operating System :: Unix", - "Operating System :: POSIX", - "Environment :: Console", - ], - description="Map CSV data into dataclasses", - install_requires=requirements, - license="BSD license", - long_description=readme + "\n\n" + history, - long_description_content_type="text/markdown", - include_package_data=True, - keywords="dataclass dataclasses csv dataclass-csv", - name="dataclass-csv", - packages=find_packages(include=["dataclass_csv"]), - package_data={"dataclass_csv": ["py.typed", "*.pyi"]}, - setup_requires=setup_requirements, - test_suite="tests", - tests_require=test_requirements, - url="https://github.com/dfurtado/dataclass-csv", - version="1.4.0", - zip_safe=False, -) diff --git a/tests/conftest.py b/tests/conftest.py index 6190666..6ef201d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,26 +1,112 @@ -from csv import DictWriter +import datetime +import io +import pathlib import pytest +from pydantic_csv import BasemodelCSVReader, BasemodelCSVWriter -@pytest.fixture() -def create_csv(tmpdir_factory): - def func(data, fieldnames=None, filename="user.csv", factory=tmpdir_factory): +from .models import LotsOfDates, User, UserOptional - assert data - file = tmpdir_factory.mktemp("data").join(filename) +@pytest.fixture +def files_path() -> pathlib.Path: + tests_folder: pathlib.Path = pathlib.Path(__file__).resolve().parent + return tests_folder / "mocks" - row = data[0] if isinstance(data, list) else data - header = fieldnames if fieldnames is not None else row.keys() +@pytest.fixture +def user_list(): + return [ + User( + id=13, + firstname="Emily", + lastname="Johnson", + birthday=datetime.date(year=1991, month=10, day=12), + signed_up=True, + ), + User(id=14, firstname="Benjamin", lastname="Adams", birthday=datetime.date(year=1986, month=3, day=29)), + User( + id=15, + firstname="Olivia", + lastname="Anderson", + birthday=datetime.date(year=2000, month=8, day=3), + signed_up=True, + ), + ] - with file.open("w") as f: - writer = DictWriter(f, fieldnames=header) - writer.writeheader() - addrow = writer.writerows if isinstance(data, list) else writer.writerow - addrow(data) - return file +@pytest.fixture +def users_as_csv_buffer(user_list): + buffer: io.StringIO = io.StringIO() - return func + writer = BasemodelCSVWriter(buffer, user_list, User) + writer.write() + + buffer.seek(0) + return buffer.read() + + +@pytest.fixture +def users_mapped_as_csv_buffer(user_list): + buffer: io.StringIO = io.StringIO() + + writer = BasemodelCSVWriter(buffer, user_list, User) + writer.map("firstname").to("First Name") + writer.map("lastname").to("Last Name") + writer.map("birthday").to("Birthday") + writer.write() + + buffer.seek(0) + return buffer.read() + + +@pytest.fixture +def users_from_csv(files_path): + with open(files_path / "users.csv", newline="") as csv: + value = csv.read() + + return value + + +@pytest.fixture +def users_mapped_from_csv(files_path): + with open(files_path / "users_mapped.csv", newline="") as csv: + value = csv.read() + + return value + + +@pytest.fixture +def users_read_from_csv(files_path) -> list[BasemodelCSVReader]: + with open(files_path / "users.csv", newline="") as csv: + reader = BasemodelCSVReader(csv, User) + return list(reader) + + +@pytest.fixture +def users_read_from_csv_with_spaces(files_path) -> list[BasemodelCSVReader]: + with open(files_path / "users_space_in_header.csv", newline="") as csv: + reader = BasemodelCSVReader(csv, User) + return list(reader) + + +@pytest.fixture +def users_read_from_csv_optional(files_path): + with open(files_path / "users_optional.csv", newline="") as csv: + reader = BasemodelCSVReader(csv, UserOptional) + return list(reader) + + +@pytest.fixture +def model_from_csv_default_factory(files_path): + with open(files_path / "default_factory.csv", newline="") as csv: + reader = BasemodelCSVReader(csv, UserOptional) + return list(reader) + + +@pytest.fixture +def dates_from_csv(files_path): + with open(files_path / "dates.csv", newline="") as csv: + reader = BasemodelCSVReader(csv, LotsOfDates, delimiter=";") + return list(reader) diff --git a/tests/mocks.py b/tests/mocks.py deleted file mode 100644 index 9d123f8..0000000 --- a/tests/mocks.py +++ /dev/null @@ -1,137 +0,0 @@ -import dataclasses -import re - -from datetime import date, datetime - -from dataclass_csv import dateformat, accept_whitespaces - -from typing import Optional - - -@dataclasses.dataclass -class User: - name: str - age: int - - -@dataclasses.dataclass -class SimpleUser: - name: str - - -class NonDataclassUser: - name: str - - -@dataclasses.dataclass -class UserWithoutAcceptWhiteSpacesDecorator: - name: str - - -@accept_whitespaces -@dataclasses.dataclass -class UserWithAcceptWhiteSpacesDecorator: - name: str - - -@dataclasses.dataclass -class UserWithAcceptWhiteSpacesMetadata: - name: str = dataclasses.field(metadata={"accept_whitespaces": True}) - - -@dataclasses.dataclass -class UserWithoutDateFormatDecorator: - name: str - create_date: datetime - - -@dateformat("%Y-%m-%d") -@dataclasses.dataclass -class UserWithDateFormatDecorator: - name: str - create_date: datetime - - -@dateformat("%Y-%m-%d") -@dataclasses.dataclass -class UserWithDateFormatDecoratorAndDateField: - name: str - create_date: date - - -@dataclasses.dataclass -class UserWithDateFormatMetadata: - name: str - create_date: datetime = dataclasses.field(metadata={"dateformat": "%Y-%m-%d"}) - - -@dateformat("%Y-%m-%d") -@dataclasses.dataclass -class UserWithDateFormatDecoratorAndMetadata: - name: str - birthday: datetime - create_date: datetime = dataclasses.field(metadata={"dateformat": "%Y-%m-%d %H:%M"}) - - -@dataclasses.dataclass -class DataclassWithBooleanValue: - boolValue: bool - - -@dataclasses.dataclass -class DataclassWithBooleanValueNoneDefault: - boolValue: Optional[bool] = None - - -@dataclasses.dataclass -class UserWithInitFalse: - firstname: str - lastname: str - age: int = dataclasses.field(init=False) - - -@dataclasses.dataclass -class UserWithInitFalseAndDefaultValue: - firstname: str - lastname: str - age: int = dataclasses.field(init=False, default=0) - - -@dataclasses.dataclass -class UserWithOptionalAge: - name: str - age: Optional[int] - - -@dataclasses.dataclass -class UserWithDefaultDatetimeField: - name: str - birthday: datetime = datetime.now() - - -class SSN: - def __init__(self, val): - if re.match(r"\d{9}", val): - self.val = f"{val[0:3]}-{val[3:5]}-{val[5:9]}" - elif re.match(r"\d{3}-\d{2}-\d{4}", val): - self.val = val - else: - raise ValueError(f"Invalid SSN: {val!r}") - - -@dataclasses.dataclass -class UserWithSSN: - name: str - ssn: SSN - - -@dataclasses.dataclass -class UserWithEmail: - name: str - email: str - - -@dataclasses.dataclass -class UserWithOptionalEmail: - name: str - email: str = "not specified" diff --git a/tests/mocks/dates.csv b/tests/mocks/dates.csv new file mode 100644 index 0000000..48cb729 --- /dev/null +++ b/tests/mocks/dates.csv @@ -0,0 +1,3 @@ +start;end;timestamp +Sunday, 6. January 2002;10.01.2002;1010620800 +Tuesday, 5. August 1997;09.08.1997;1679616000.0 diff --git a/tests/mocks/default_factory.csv b/tests/mocks/default_factory.csv new file mode 100644 index 0000000..a150d4e --- /dev/null +++ b/tests/mocks/default_factory.csv @@ -0,0 +1,3 @@ +firstname,created +Olivia, +John,2010-12-21 12:34 diff --git a/tests/mocks/users.csv b/tests/mocks/users.csv new file mode 100644 index 0000000..7b824cc --- /dev/null +++ b/tests/mocks/users.csv @@ -0,0 +1,4 @@ +id,firstname,lastname,birthday,signed_up +13,Emily,Johnson,1991-10-12,True +14,Benjamin,Adams,1986-03-29,False +15,Olivia,Anderson,2000-08-03,True diff --git a/tests/mocks/users_duplicate_header.csv b/tests/mocks/users_duplicate_header.csv new file mode 100644 index 0000000..66c8cf5 --- /dev/null +++ b/tests/mocks/users_duplicate_header.csv @@ -0,0 +1,4 @@ +id,firstname,lastname,birthday,signed_up,lastname +13,Emily,,1991-10-12,True,Johnson +14,Benjamin,,1986-03-29,False,Adams +15,Olivia,,2000-08-03,True,Anderson diff --git a/tests/mocks/users_empty_spaces.csv b/tests/mocks/users_empty_spaces.csv new file mode 100644 index 0000000..d8666b2 --- /dev/null +++ b/tests/mocks/users_empty_spaces.csv @@ -0,0 +1,3 @@ +firstname,lastname + ,Johnson +Benjamin, diff --git a/tests/mocks/users_mapped.csv b/tests/mocks/users_mapped.csv new file mode 100644 index 0000000..f9c93f2 --- /dev/null +++ b/tests/mocks/users_mapped.csv @@ -0,0 +1,4 @@ +id,First Name,Last Name,Birthday,signed_up +13,Emily,Johnson,1991-10-12,True +14,Benjamin,Adams,1986-03-29,False +15,Olivia,Anderson,2000-08-03,True diff --git a/tests/mocks/users_optional.csv b/tests/mocks/users_optional.csv new file mode 100644 index 0000000..e59c04a --- /dev/null +++ b/tests/mocks/users_optional.csv @@ -0,0 +1,3 @@ +firstname,age,created +Olivia,22, +Benjamin,,2010-12-21 12:34 diff --git a/tests/mocks/users_space_in_header.csv b/tests/mocks/users_space_in_header.csv new file mode 100644 index 0000000..5d981f6 --- /dev/null +++ b/tests/mocks/users_space_in_header.csv @@ -0,0 +1,4 @@ + id,firstname , lastname ,birthday, signed_up +13,Emily,Johnson,1991-10-12,True +14,Benjamin,Adams,1986-03-29,False +15,Olivia,Anderson,2000-08-03,True diff --git a/tests/mocks/users_wrong_type.csv b/tests/mocks/users_wrong_type.csv new file mode 100644 index 0000000..220d47d --- /dev/null +++ b/tests/mocks/users_wrong_type.csv @@ -0,0 +1,3 @@ +firstname,age +Olivia,22 +Benjamin,not a number diff --git a/tests/models.py b/tests/models.py new file mode 100644 index 0000000..bce8c3c --- /dev/null +++ b/tests/models.py @@ -0,0 +1,53 @@ +from datetime import date, datetime +from typing import Optional + +import pydantic +from pydantic import Field + + +class User(pydantic.BaseModel): + id: int + firstname: str + lastname: str + birthday: date + signed_up: bool = False + + +class SimpleUser(pydantic.BaseModel): + firstname: str + lastname: str + + +class NonBaseModelUser: + firstname: str + lastname: str + + +class Boolean(pydantic.BaseModel): + true: Optional[bool] = None + false: Optional[bool] = None + + +class UserOptional(pydantic.BaseModel): + firstname: str = "John" + age: Optional[int] = None + created: Optional[datetime] = datetime(year=2002, month=2, day=20, hour=22, minute=22) + + +class DefaultFactory(pydantic.BaseModel): + firstname: str + created: Optional[datetime] = Field(default_factory=datetime.now) + + +class LotsOfDates(pydantic.BaseModel): + start: date + end: date + timestamp: datetime + + @pydantic.field_validator("start", mode="before") + def parse_start_date(cls, value): + return datetime.strptime(value, "%A, %d. %B %Y").date() + + @pydantic.field_validator("end", mode="before") + def parse_end_date(cls, value): + return datetime.strptime(value, "%d.%m.%Y").date() diff --git a/tests/test_basemodel_csv_reader.py b/tests/test_basemodel_csv_reader.py new file mode 100644 index 0000000..4cc9bc3 --- /dev/null +++ b/tests/test_basemodel_csv_reader.py @@ -0,0 +1,111 @@ +import io +from datetime import date, datetime, timezone + +import pytest + +from pydantic_csv import BasemodelCSVReader, CSVValueError + +from .models import NonBaseModelUser, SimpleUser, User, UserOptional + + +def test_reader_with_non_dataclass(): + with pytest.raises(ValueError): + BasemodelCSVReader(io.StringIO(), NonBaseModelUser) + + +def test_reader_with_none_class(): + with pytest.raises(ValueError): + BasemodelCSVReader(io.StringIO(), None) + + +def test_reader_with_none_file(): + with pytest.raises(ValueError): + BasemodelCSVReader(None, User) + + +def test_reader_with_correct_values(users_read_from_csv, user_list): + assert users_read_from_csv == user_list + + +def test_reader_with_correct_values_spaces_in_header(users_read_from_csv_with_spaces, user_list): + assert users_read_from_csv_with_spaces == user_list + + +def test_reader_with_optional_types(users_read_from_csv_optional): + user1, user2 = users_read_from_csv_optional[0], users_read_from_csv_optional[1] + + assert user1.firstname == "Olivia" + assert user1.age == 22 + assert user1.created == datetime(year=2002, month=2, day=20, hour=22, minute=22) + + assert user2.firstname == "Benjamin" + assert user2.age is None + assert user2.created == datetime(year=2010, month=12, day=21, hour=12, minute=34) + + +def test_raise_error_when_mapped_column_not_found(files_path): + with open(files_path / "users_mapped.csv", newline="") as user_csv: + with pytest.raises(KeyError, match="The value for the mapped column `Surname` is missing in the CSV file"): + reader = BasemodelCSVReader(user_csv, User) + reader.map("First Name").to("firstname") + reader.map("Surname").to("lastname") + reader.map("Birthday").to("birthday") + list(reader) + + +def test_raise_error_when_field_not_found(files_path): + with open(files_path / "users_mapped.csv", newline="") as user_csv: + with pytest.raises(KeyError, match="The value for the column `birthday` is missing in the CSV file"): + reader = BasemodelCSVReader(user_csv, User) + reader.map("First Name").to("firstname") + reader.map("Last Name").to("lastname") + list(reader) + + +def test_raise_error_when_duplicate_header_items(files_path): + with open(files_path / "users_duplicate_header.csv", newline="") as user_csv: + with pytest.raises(ValueError): + reader = BasemodelCSVReader(user_csv, User) + list(reader) + + +def test_skip_header_validation(files_path, user_list): + """ + Please note that the values used for the pydantic.BaseModel initialisation on duplicate header is the later one. + So, if the first column has the correct values and the second is empty it will raise an error because a required + field is empty. + """ + with open(files_path / "users_duplicate_header.csv", newline="") as user_csv: + reader = BasemodelCSVReader(user_csv, User, validate_header=False) + assert list(reader) == user_list + + +def test_default_factory(model_from_csv_default_factory): + for item in model_from_csv_default_factory: + assert isinstance(item.created, datetime) + + +def test_custom_dateformats(dates_from_csv): + date1, date2 = dates_from_csv[0], dates_from_csv[1] + + assert date1.start == date(year=2002, month=1, day=6) + assert date1.end == date(year=2002, month=1, day=10) + assert date1.timestamp == datetime(year=2002, month=1, day=10, hour=0, minute=0, tzinfo=timezone.utc) + + assert date2.start == date(year=1997, month=8, day=5) + assert date2.end == date(year=1997, month=8, day=9) + assert date2.timestamp == datetime(year=2023, month=3, day=24, hour=0, minute=0, tzinfo=timezone.utc) + + +def test_should_raise_error_str_to_int(files_path): + with open(files_path / "users_wrong_type.csv", newline="") as user_csv: + with pytest.raises(CSVValueError): + reader = BasemodelCSVReader(user_csv, UserOptional) + list(reader) + + +def test_should_raise_error_when_required_value_is_empty_spaces(files_path): + with open(files_path / "users_empty_spaces.csv", newline="") as user_csv: + with pytest.raises(CSVValueError): + reader = BasemodelCSVReader(user_csv, SimpleUser) + list(reader) diff --git a/tests/test_basemodel_csv_writer.py b/tests/test_basemodel_csv_writer.py new file mode 100644 index 0000000..aea54bc --- /dev/null +++ b/tests/test_basemodel_csv_writer.py @@ -0,0 +1,52 @@ +import io + +import pytest + +from pydantic_csv import BasemodelCSVWriter + +from .models import NonBaseModelUser, SimpleUser, User + + +def test_create_csv_file(users_as_csv_buffer, users_from_csv): + assert users_as_csv_buffer == users_from_csv + + +def test_wrong_type_items(user_list): + with pytest.raises(TypeError): + w = BasemodelCSVWriter(io.StringIO(), user_list, SimpleUser) + w.write() + + +def test_with_a_non_dataclass(user_list): + with pytest.raises(ValueError): + w = BasemodelCSVWriter(io.StringIO(), user_list, NonBaseModelUser) + w.write() + + +def test_with_a_empty_cls_value(user_list): + with pytest.raises(ValueError): + w = BasemodelCSVWriter(io.StringIO(), user_list, None) + w.write() + + +def test_invalid_file_value(user_list): + with pytest.raises(ValueError): + w = BasemodelCSVWriter(None, user_list, User) + w.write() + + +def test_with_data_not_a_list(user_list): + with pytest.raises(ValueError): + w = BasemodelCSVWriter(io.StringIO(), user_list[0], User) + w.write() + + +def test_with_wrong_type_in_list(user_list): + user_list.append(SimpleUser(firstname="Emily", lastname="Johnson")) + with pytest.raises(TypeError): + w = BasemodelCSVWriter(io.StringIO(), user_list, User) + w.write() + + +def test_header_mapping(users_mapped_as_csv_buffer, users_mapped_from_csv): + assert users_mapped_as_csv_buffer == users_mapped_from_csv diff --git a/tests/test_csv_data_validation.py b/tests/test_csv_data_validation.py deleted file mode 100644 index 34c81f5..0000000 --- a/tests/test_csv_data_validation.py +++ /dev/null @@ -1,89 +0,0 @@ -import pytest - -from dataclass_csv import DataclassReader, CsvValueError - -from .mocks import User, UserWithDateFormatDecorator, UserWithSSN - - -def test_should_raise_error_str_to_int_prop(create_csv): - csv_file = create_csv({"name": "User1", "age": "wrong type"}) - - with csv_file.open() as f: - with pytest.raises(CsvValueError): - reader = DataclassReader(f, User) - list(reader) - - -def test_should_raise_error_with_incorrect_dateformat(create_csv): - csv_file = create_csv({"name": "User1", "create_date": "2018-12-07 10:00"}) - - with csv_file.open() as f: - with pytest.raises(CsvValueError): - reader = DataclassReader(f, UserWithDateFormatDecorator) - list(reader) - - -def test_should_raise_error_when_required_value_is_missing(create_csv): - csv_file = create_csv({"name": "User1", "age": None}) - - with csv_file.open() as f: - with pytest.raises(CsvValueError): - reader = DataclassReader(f, User) - list(reader) - - -def test_should_raise_error_when_required_column_is_missing(create_csv): - csv_file = create_csv({"name": "User1"}) - - with csv_file.open() as f: - with pytest.raises(KeyError): - reader = DataclassReader(f, User) - list(reader) - - -def test_should_raise_error_when_required_value_is_emptyspaces(create_csv): - csv_file = create_csv({"name": " ", "age": 40}) - - with csv_file.open() as f: - with pytest.raises(CsvValueError): - reader = DataclassReader(f, User) - list(reader) - - -def test_csv_header_items_with_spaces_with_missing_props_raises_keyerror(create_csv): - csv_file = create_csv({" name": "User1"}) - - with csv_file.open() as f: - with pytest.raises(KeyError): - reader = DataclassReader(f, User) - list(reader) - - -def test_csv_header_items_with_spaces_with_missing_value(create_csv): - csv_file = create_csv({" name": "User1", "age ": None}) - - with csv_file.open() as f: - with pytest.raises(CsvValueError): - reader = DataclassReader(f, User) - list(reader) - - -def test_csv_header_items_with_spaces_with_prop_with_wrong_type(create_csv): - csv_file = create_csv({" name": "User1", "age ": "this should be an int"}) - - with csv_file.open() as f: - with pytest.raises(CsvValueError): - reader = DataclassReader(f, User) - list(reader) - - -def test_passes_through_exceptions_from_user_defined_types(create_csv): - csv_file = create_csv({"name": "User1", "ssn": "123-45-678"}) - - with csv_file.open() as f: - with pytest.raises(CsvValueError) as exc_info: - reader = DataclassReader(f, UserWithSSN) - list(reader) - cause = exc_info.value.__cause__ - assert isinstance(cause, ValueError) - assert "Invalid SSN" in str(cause) diff --git a/tests/test_dataclass_reader.py b/tests/test_dataclass_reader.py deleted file mode 100644 index 2c3df49..0000000 --- a/tests/test_dataclass_reader.py +++ /dev/null @@ -1,296 +0,0 @@ -import pytest -import dataclasses - -from datetime import date, datetime -from dataclass_csv import DataclassReader, CsvValueError - -from .mocks import ( - User, - UserWithOptionalAge, - DataclassWithBooleanValue, - DataclassWithBooleanValueNoneDefault, - UserWithInitFalse, - UserWithInitFalseAndDefaultValue, - UserWithDefaultDatetimeField, - UserWithDateFormatDecoratorAndDateField, - UserWithSSN, - SSN, - UserWithEmail, - UserWithOptionalEmail, -) - - -def test_reader_with_non_dataclass(create_csv): - csv_file = create_csv({"name": "User1", "age": 40}) - - class DummyUser: - pass - - with csv_file.open() as f: - with pytest.raises(ValueError): - DataclassReader(f, DummyUser) - - -def test_reader_with_none_class(create_csv): - csv_file = create_csv({"name": "User1", "age": 40}) - - with csv_file.open() as f: - with pytest.raises(ValueError): - DataclassReader(f, None) - - -def test_reader_with_none_file(): - with pytest.raises(ValueError): - DataclassReader(None, User) - - -def test_reader_with_correct_values(create_csv): - csv_file = create_csv({"name": "User", "age": 40}) - - with csv_file.open() as f: - reader = DataclassReader(f, User) - list(reader) - - -def test_reader_values(create_csv): - csv_file = create_csv([{"name": "User1", "age": 40}, {"name": "User2", "age": 30}]) - - with csv_file.open() as f: - reader = DataclassReader(f, User) - items = list(reader) - - assert items and len(items) == 2 - - for item in items: - assert dataclasses.is_dataclass(item) - - user1, user2 = items[0], items[1] - - assert user1.name == "User1" - assert user1.age == 40 - - assert user2.name == "User2" - assert user2.age == 30 - - -def test_csv_header_items_with_spaces(create_csv): - csv_file = create_csv({" name": "User1", "age ": 40}) - - with csv_file.open() as f: - reader = DataclassReader(f, User) - items = list(reader) - - assert items and len(items) > 0 - - user = items[0] - - assert user.name == "User1" - assert user.age == 40 - - -def test_csv_header_items_with_spaces_together_with_skipinitialspaces(create_csv): - csv_file = create_csv({" name": "User1", "age ": 40}) - - with csv_file.open() as f: - reader = DataclassReader(f, User, skipinitialspace=True) - items = list(reader) - - assert items and len(items) > 0 - - user = items[0] - - assert user.name == "User1" - assert user.age == 40 - - -def test_parse_bool_value_true(create_csv): - for true_value in ["yes", "true", "t", "y", "1"]: - csv_file = create_csv({"boolValue": f"{true_value}"}) - with csv_file.open() as f: - reader = DataclassReader(f, DataclassWithBooleanValue) - items = list(reader) - dataclass_instance = items[0] - assert dataclass_instance.boolValue is True - - -def test_parse_bool_value_false(create_csv): - for false_value in ["no", "false", "f", "n", "0"]: - csv_file = create_csv({"boolValue": f"{false_value}"}) - with csv_file.open() as f: - reader = DataclassReader(f, DataclassWithBooleanValue) - items = list(reader) - dataclass_instance = items[0] - assert dataclass_instance.boolValue is False - - -def test_parse_bool_value_invalid(create_csv): - csv_file = create_csv({"boolValue": "notValidBoolean"}) - with csv_file.open() as f: - with pytest.raises(CsvValueError): - reader = DataclassReader(f, DataclassWithBooleanValue) - list(reader) - - -def test_parse_bool_value_none_default(create_csv): - csv_file = create_csv({"boolValue": ""}) - with csv_file.open() as f: - reader = DataclassReader(f, DataclassWithBooleanValueNoneDefault) - items = list(reader) - dataclass_instance = items[0] - assert dataclass_instance.boolValue is None - - -def test_skip_dataclass_field_when_init_is_false(create_csv): - csv_file = create_csv({"firstname": "User1", "lastname": "TestUser"}) - with csv_file.open() as f: - reader = DataclassReader(f, UserWithInitFalse) - list(reader) - - -def test_try_to_access_not_initialized_prop_raise_attr_error(create_csv): - csv_file = create_csv({"firstname": "User1", "lastname": "TestUser"}) - with csv_file.open() as f: - reader = DataclassReader(f, UserWithInitFalse) - items = list(reader) - with pytest.raises(AttributeError): - user = items[0] - assert user.age is not None - - -def test_try_to_access_not_initialized_prop_with_default_value(create_csv): - csv_file = create_csv({"firstname": "User1", "lastname": "TestUser"}) - with csv_file.open() as f: - reader = DataclassReader(f, UserWithInitFalseAndDefaultValue) - items = list(reader) - user = items[0] - assert user.age == 0 - - -def test_reader_with_optional_types(create_csv): - csv_file = create_csv({"name": "User", "age": 40}) - - with csv_file.open() as f: - reader = DataclassReader(f, UserWithOptionalAge) - list(reader) - - -def test_reader_with_datetime_default_value(create_csv): - csv_file = create_csv({"name": "User", "birthday": ""}) - - with csv_file.open() as f: - reader = DataclassReader(f, UserWithDefaultDatetimeField) - items = list(reader) - assert len(items) > 0 - assert isinstance(items[0].birthday, datetime) - - -def test_reader_with_date(create_csv): - csv_file = create_csv({"name": "User", "create_date": "2019-01-01"}) - - with csv_file.open() as f: - reader = DataclassReader(f, UserWithDateFormatDecoratorAndDateField) - items = list(reader) - assert len(items) > 0 - assert isinstance(items[0].create_date, date) - assert items[0].create_date == date(2019, 1, 1) - - -def test_should_parse_user_defined_types(create_csv): - csv_file = create_csv( - [ - {"name": "User1", "ssn": "123-45-6789"}, - {"name": "User1", "ssn": "123456789"}, - ] - ) - - with csv_file.open() as f: - reader = DataclassReader(f, UserWithSSN) - items = list(reader) - assert len(items) == 2 - - assert isinstance(items[0].ssn, SSN) - assert items[0].ssn.val == "123-45-6789" - - assert isinstance(items[1].ssn, SSN) - assert items[1].ssn.val == "123-45-6789" - - -def test_raise_error_when_mapped_column_not_found(create_csv): - csv_file = create_csv({"name": "User1", "e-mail": "test@test.com"}) - - with csv_file.open() as f: - with pytest.raises( - KeyError, - match="The value for the mapped column `e_mail` is missing in the CSV file", - ): - reader = DataclassReader(f, UserWithEmail) - reader.map("e_mail").to("email") - list(reader) - - -def test_raise_error_when_field_not_found(create_csv): - csv_file = create_csv({"name": "User1", "e-mail": "test@test.com"}) - - with csv_file.open() as f: - with pytest.raises( - KeyError, - match="The value for the column `email` is missing in the CSV file.", - ): - reader = DataclassReader(f, UserWithEmail) - list(reader) - - -def test_raise_error_when_duplicate_header_items(create_csv): - csv_file = create_csv( - {"name": "User1", "email": "test@test.com"}, - fieldnames=["name", "email", "name"], - ) - - with csv_file.open() as f: - with pytest.raises(ValueError): - reader = DataclassReader(f, UserWithEmail) - list(reader) - - -def test_skip_header_validation(create_csv): - csv_file = create_csv( - {"name": "User1", "email": "test@test.com"}, - fieldnames=["name", "email", "name"], - ) - - with csv_file.open() as f: - reader = DataclassReader(f, UserWithEmail, validate_header=False) - list(reader) - - -def test_dt_different_order_as_csv(create_csv): - csv_file = create_csv( - {"email": "test@test.com", "name": "User1"}, - fieldnames=[ - "email", - "name", - ], - ) - - with csv_file.open() as f: - reader = DataclassReader(f, UserWithEmail) - list(reader) - - -def test_dt_different_order_as_csv_and_option_field(create_csv): - data = [ - {"email": "test@test.com", "name": "User1"}, - {"name": "User1"}, - ] - - csv_file = create_csv( - data, - fieldnames=[ - "email", - "name", - ], - ) - - with csv_file.open() as f: - reader = DataclassReader(f, UserWithOptionalEmail) - list(reader) diff --git a/tests/test_dataclass_writer.py b/tests/test_dataclass_writer.py deleted file mode 100644 index 4b54517..0000000 --- a/tests/test_dataclass_writer.py +++ /dev/null @@ -1,72 +0,0 @@ -import pytest - -from dataclass_csv import DataclassWriter, DataclassReader - -from .mocks import User, SimpleUser, NonDataclassUser - - -def test_create_csv_file(tmpdir_factory): - tempfile = tmpdir_factory.mktemp("data").join("user_001.csv") - - users = [User(name="test", age=40)] - - with tempfile.open("w") as f: - w = DataclassWriter(f, users, User) - w.write() - - with tempfile.open() as f: - reader = DataclassReader(f, User) - saved_users = list(reader) - - assert len(saved_users) > 0 - assert saved_users[0].name == users[0].name - - -def test_wrong_type_items(tmpdir_factory): - tempfile = tmpdir_factory.mktemp("data").join("user_001.csv") - - users = [User(name="test", age=40)] - - with tempfile.open("w") as f: - with pytest.raises(TypeError): - w = DataclassWriter(f, users, SimpleUser) - w.write() - - -def test_with_a_non_dataclass(tmpdir_factory): - tempfile = tmpdir_factory.mktemp("data").join("user_001.csv") - - users = [User(name="test", age=40)] - - with tempfile.open("w") as f: - with pytest.raises(ValueError): - DataclassWriter(f, users, NonDataclassUser) - - -def test_with_a_empty_cls_value(tmpdir_factory): - tempfile = tmpdir_factory.mktemp("data").join("user_001.csv") - - users = [User(name="test", age=40)] - - with tempfile.open("w") as f: - with pytest.raises(ValueError): - DataclassWriter(f, users, None) - - -def test_invalid_file_value(tmpdir_factory): - tmpdir_factory.mktemp("data").join("user_001.csv") - - users = [User(name="test", age=40)] - - with pytest.raises(ValueError): - DataclassWriter(None, users, User) - - -def test_with_data_not_a_list(tmpdir_factory): - tempfile = tmpdir_factory.mktemp("data").join("user_001.csv") - - users = User(name="test", age=40) - - with tempfile.open("w") as f: - with pytest.raises(ValueError): - DataclassWriter(f, users, User) diff --git a/tests/test_decorators.py b/tests/test_decorators.py deleted file mode 100644 index e74ac86..0000000 --- a/tests/test_decorators.py +++ /dev/null @@ -1,83 +0,0 @@ -import pytest - -from dataclass_csv import DataclassReader, CsvValueError - -from .mocks import ( - UserWithoutDateFormatDecorator, - UserWithDateFormatDecorator, - UserWithDateFormatMetadata, - UserWithDateFormatDecoratorAndMetadata, - UserWithoutAcceptWhiteSpacesDecorator, - UserWithAcceptWhiteSpacesDecorator, - UserWithAcceptWhiteSpacesMetadata, -) - - -def test_should_raise_error_without_dateformat(create_csv): - csv_file = create_csv({"name": "Test", "create_date": "2018-12-09"}) - - with csv_file.open("r") as f: - with pytest.raises(AttributeError): - reader = DataclassReader(f, UserWithoutDateFormatDecorator) - list(reader) - - -def test_shold_not_raise_error_when_using_dateformat_decorator(create_csv): - csv_file = create_csv({"name": "Test", "create_date": "2018-12-09"}) - - with csv_file.open("r") as f: - reader = DataclassReader(f, UserWithDateFormatDecorator) - list(reader) - - -def test_shold_not_raise_error_when_dateformat_metadata(create_csv): - csv_file = create_csv({"name": "Test", "create_date": "2018-12-09"}) - - with csv_file.open("r") as f: - reader = DataclassReader(f, UserWithDateFormatMetadata) - list(reader) - - -def test_use_decorator_when_metadata_is_not_defined(create_csv): - csv_file = create_csv( - { - "name": "Test", - "birthday": "1977-08-26", - "create_date": "2018-12-09 11:11", - } - ) - - with csv_file.open("r") as f: - reader = DataclassReader(f, UserWithDateFormatDecoratorAndMetadata) - list(reader) - - -def test_should_raise_error_when_value_is_whitespaces(create_csv): - csv_file = create_csv({"name": " "}) - - with csv_file.open("r") as f: - with pytest.raises(CsvValueError): - reader = DataclassReader(f, UserWithoutAcceptWhiteSpacesDecorator) - list(reader) - - -def test_should_not_raise_error_when_value_is_whitespaces(create_csv): - csv_file = create_csv({"name": " "}) - - with csv_file.open("r") as f: - reader = DataclassReader(f, UserWithAcceptWhiteSpacesDecorator) - data = list(reader) - - user = data[0] - assert user.name == " " - - -def test_should_not_raise_error_when_using_meta_accept_whitespaces(create_csv): - csv_file = create_csv({"name": " "}) - - with csv_file.open("r") as f: - reader = DataclassReader(f, UserWithAcceptWhiteSpacesMetadata) - data = list(reader) - - user = data[0] - assert user.name == " " From 4ebbb715b79b7e5661904f92bb64879af0487a73 Mon Sep 17 00:00:00 2001 From: Nathan Richard Date: Fri, 28 Jun 2024 18:30:54 +0200 Subject: [PATCH 2/4] refactor: add description --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ddbfb4e..97a3365 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "pydantic-csv" version = "0.1.0" -description = "" +description = "convert CSV to pydantic.BaseModel and vice versa" authors = ["Nathan Richard "] license = "LICENSE" readme = "README.md" From 706dbdd7c072f5e987f1f7be128a9d2c8aedf577 Mon Sep 17 00:00:00 2001 From: Nathan Richard Date: Fri, 28 Jun 2024 18:32:52 +0200 Subject: [PATCH 3/4] docs: remove unnecessary lines --- README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.md b/README.md index 6668234..a8248f5 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,6 @@ Pydantic CSV makes working with CSV files easier and much better than working with Dicts. It uses pydantic BaseModels to store data of every row on the CSV file and also uses type annotations which enables proper type checking and validation. ## Table of Contents - -___ - [Main features](#main-features) - [Installation](#installation) - [Getting started](#getting-started) @@ -19,8 +17,6 @@ ___ - [Copyright and License](#copyright-and-license) - [Credits](#credits) -___ - ## Main features - Use `pydantic.BaseModel` instead of dictionaries to represent the rows in the CSV file. From 3947e74960f3efdcc46e7af8bc23ad94e047e849 Mon Sep 17 00:00:00 2001 From: Nathan Richard Date: Fri, 5 Jul 2024 09:15:20 +0200 Subject: [PATCH 4/4] refactor: support also generators, tuples, etc. --- README.md | 4 ++-- pydantic_csv/basemodel_csv_writer.py | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6668234..88aa6fb 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ ___ - Familiar syntax. The `BasemodelCSVReader` is used almost the same way as the `DictReader` in the standard library. - It uses `BaseModel` features that let you define Field properties or Config so the data can be parsed exactly the way you want. - Make the code cleaner. No more extra loops to convert data to the correct type, perform validation, set default values, the `BasemodelCSVReader` will do all this for you. -- In addition to the `BasemodelCSVReader`, the library also provides a `BasemodelCSVWriter` which enables creating a CSV file using a list of instances of a BaseModel. +- In addition to the `BasemodelCSVReader`, the library also provides a `BasemodelCSVWriter` which enables creating a CSV file using an Iterable with instances of a BaseModel. - Because [sqlmodel](https://github.com/tiangolo/sqlmodel) uses pydantic.BaseModels too, you can directly fill a database with data from a CSV @@ -298,7 +298,7 @@ class User(BaseModel): age: int ``` -And in your program we have a list of users: +And in your program we have a list (also supports Generator and Tuples. Just any Iterable that supports storing Objects) of users: ```python users = [ diff --git a/pydantic_csv/basemodel_csv_writer.py b/pydantic_csv/basemodel_csv_writer.py index 0396893..d7210d5 100644 --- a/pydantic_csv/basemodel_csv_writer.py +++ b/pydantic_csv/basemodel_csv_writer.py @@ -3,6 +3,7 @@ """ import csv +from collections.abc import Iterable from typing import Any import pydantic @@ -19,7 +20,7 @@ class BasemodelCSVWriter: def __init__( self, file_obj: Any, - data: list[Any], + data: Iterable, model: type[BaseModel], *, use_alias: bool = True, @@ -29,8 +30,11 @@ def __init__( if not file_obj: raise ValueError("The 'file_obj' argument is required") - if not isinstance(data, list): - raise ValueError("Invalid 'data' argument. It must be a list") + if not isinstance(data, Iterable) or isinstance(data, (str, BaseModel)): + raise ValueError( + "Invalid 'data' argument. It must be an Iterable which can hold multiple BaseModel " + "instances. eg: list, generator, tuple" + ) if model is None or not issubclass(model, pydantic.BaseModel): raise ValueError("Invalid 'cls' argument. It must be a pydantic BaseModel")