diff --git a/.ci/release b/.ci/release new file mode 100755 index 0000000..6cff663 --- /dev/null +++ b/.ci/release @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +''' +Run [[file:scripts/release][scripts/release]] to deploy Python package onto [[https://pypi.org][PyPi]] and [[https://test.pypi.org][test PyPi]]. + +The script expects =TWINE_PASSWORD= environment variable to contain the [[https://pypi.org/help/#apitoken][PyPi token]] (not the password!). + +The script can be run manually. +It's also running as =pypi= job in [[file:.github/workflows/main.yml][Github Actions config]]. Packages are deployed on: +- every master commit, onto test pypi +- every new tag, onto production pypi + +You'll need to set =TWINE_PASSWORD= and =TWINE_PASSWORD_TEST= in [[https://help.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets#creating-encrypted-secrets][secrets]] +for Github Actions deployment to work. +''' + +import os +import sys +from pathlib import Path +from subprocess import check_call +import shutil + +is_ci = os.environ.get('CI') is not None + +def main() -> None: + import argparse + p = argparse.ArgumentParser() + p.add_argument('--test', action='store_true', help='use test pypi') + args = p.parse_args() + + extra = [] + if args.test: + extra.extend(['--repository', 'testpypi']) + + root = Path(__file__).absolute().parent.parent + os.chdir(root) # just in case + + if is_ci: + # see https://github.com/actions/checkout/issues/217 + check_call('git fetch --prune --unshallow'.split()) + + dist = root / 'dist' + if dist.exists(): + shutil.rmtree(dist) + + check_call(['python3', '-m', 'build']) + + TP = 'TWINE_PASSWORD' + password = os.environ.get(TP) + if password is None: + print(f"WARNING: no {TP} passed", file=sys.stderr) + import pip_secrets + password = pip_secrets.token_test if args.test else pip_secrets.token # meh + + check_call([ + 'python3', '-m', 'twine', + 'upload', *dist.iterdir(), + *extra, + ], env={ + 'TWINE_USERNAME': '__token__', + TP: password, + **os.environ, + }) + + +if __name__ == '__main__': + main() diff --git a/.ci/release-uv b/.ci/release-uv new file mode 100755 index 0000000..4da39b7 --- /dev/null +++ b/.ci/release-uv @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +''' +Deploys Python package onto [[https://pypi.org][PyPi]] or [[https://test.pypi.org][test PyPi]]. + +- running manually + + You'll need =UV_PUBLISH_TOKEN= env variable + +- running on Github Actions + + Instead of env variable, relies on configuring github as Trusted publisher (https://docs.pypi.org/trusted-publishers/) -- both for test and regular pypi + + It's running as =pypi= job in [[file:.github/workflows/main.yml][Github Actions config]]. + Packages are deployed on: + - every master commit, onto test pypi + - every new tag, onto production pypi +''' + +UV_PUBLISH_TOKEN = 'UV_PUBLISH_TOKEN' + +import argparse +import os +import shutil +from pathlib import Path +from subprocess import check_call + +is_ci = os.environ.get('CI') is not None + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument('--use-test-pypi', action='store_true') + args = p.parse_args() + + publish_url = ['--publish-url', 'https://test.pypi.org/legacy/'] if args.use_test_pypi else [] + + root = Path(__file__).absolute().parent.parent + os.chdir(root) # just in case + + # TODO ok, for now uv won't remove dist dir if it already exists + # https://github.com/astral-sh/uv/issues/10293 + dist = root / 'dist' + if dist.exists(): + shutil.rmtree(dist) + + check_call(['uv', 'build']) + + if not is_ci: + # CI relies on trusted publishers so doesn't need env variable + assert UV_PUBLISH_TOKEN in os.environ, f'no {UV_PUBLISH_TOKEN} passed' + + check_call(['uv', 'publish', *publish_url]) + + +if __name__ == '__main__': + main() diff --git a/.ci/run b/.ci/run new file mode 100755 index 0000000..c881818 --- /dev/null +++ b/.ci/run @@ -0,0 +1,36 @@ +#!/bin/bash +set -eu + +cd "$(dirname "$0")" +cd .. # git root + +if ! command -v sudo; then + # CI or Docker sometimes doesn't have it, so useful to have a dummy + function sudo { + "$@" + } +fi + +# --parallel-live to show outputs while it's running +tox_cmd='run-parallel --parallel-live' +if [ -n "${CI-}" ]; then + # install OS specific stuff here + case "$OSTYPE" in + darwin*) + # macos + : + ;; + cygwin* | msys* | win*) + # windows + # ugh. parallel stuff seems super flaky under windows, some random failures, "file used by other process" and crap like that + tox_cmd='run' + ;; + *) + # must be linux? + : + ;; + esac +fi + +# NOTE: expects uv installed +uv tool run --with tox-uv tox $tox_cmd "$@" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..e51bcf6 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,114 @@ +# see https://github.com/karlicoss/pymplate for up-to-date reference + +name: CI +on: + push: + branches: '*' + tags: 'v[0-9]+.*' # only trigger on 'release' tags for PyPi + # Ideally I would put this in the pypi job... but github syntax doesn't allow for regexes there :shrug: + + # Needed to trigger on others' PRs. + # Note that people who fork it need to go to "Actions" tab on their fork and click "I understand my workflows, go ahead and enable them". + pull_request: + + # Needed to trigger workflows manually. + workflow_dispatch: + inputs: + debug_enabled: + type: boolean + description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' + required: false + default: false + + +jobs: + build: + strategy: + fail-fast: false + matrix: + platform: [ubuntu-latest, macos-latest, windows-latest] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] + # vvv just an example of excluding stuff from matrix + # exclude: [{platform: macos-latest, python-version: '3.6'}] + + runs-on: ${{ matrix.platform }} + + # useful for 'optional' pipelines + # continue-on-error: ${{ matrix.platform == 'windows-latest' }} + + steps: + # ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation + - run: echo "$HOME/.local/bin" >> $GITHUB_PATH + + - uses: actions/checkout@v5 + with: + submodules: recursive + fetch-depth: 0 # nicer to have all git history when debugging/for tests + + - uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - uses: astral-sh/setup-uv@v7 + with: + enable-cache: false # we don't have lock files, so can't use them as cache key + + - uses: mxschmitt/action-tmate@v3 + if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} + + # explicit bash command is necessary for Windows CI runner, otherwise it thinks it's cmd... + - run: bash .ci/run + env: + # only compute lxml coverage on ubuntu; it crashes on windows + CI_MYPY_COVERAGE: ${{ matrix.platform == 'ubuntu-latest' && '--cobertura-xml-report .coverage.mypy' || '' }} + + - if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms + uses: codecov/codecov-action@v5 + with: + fail_ci_if_error: true # default false + token: ${{ secrets.CODECOV_TOKEN }} + flags: mypy-${{ matrix.python-version }} + files: .coverage.mypy/cobertura.xml + + + pypi: + # Do not run it for PRs/cron schedule etc. + # NOTE: release tags are guarded by on: push: tags on the top. + if: github.event_name == 'push' && (startsWith(github.event.ref, 'refs/tags/') || (github.event.ref == format('refs/heads/{0}', github.event.repository.master_branch))) + # Ugh, I tried using matrix or something to explicitly generate only test pypi or prod pypi pipelines. + # But github actions is so shit, it's impossible to do any logic at all, e.g. doesn't support conditional matrix, if/else statements for variables etc. + + needs: [build] # add all other jobs here + + runs-on: ubuntu-latest + + permissions: + # necessary for Trusted Publishing + id-token: write + + steps: + # ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation + - run: echo "$HOME/.local/bin" >> $GITHUB_PATH + + - uses: actions/checkout@v5 + with: + submodules: recursive + fetch-depth: 0 # pull all commits to correctly infer vcs version + + - uses: actions/setup-python@v6 + with: + python-version: '3.10' + + - uses: astral-sh/setup-uv@v7 + with: + enable-cache: false # we don't have lock files, so can't use them as cache key + + - name: 'release to test pypi' + # always deploy merged master to test pypi + if: github.event.ref == format('refs/heads/{0}', github.event.repository.master_branch) + run: .ci/release-uv --use-test-pypi + + - name: 'release to prod pypi' + # always deploy tags to release pypi + if: startsWith(github.event.ref, 'refs/tags/') + run: .ci/release-uv diff --git a/.gitignore b/.gitignore index 0fb9476..a06322a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,195 @@ -.tox + +# Created by https://www.toptal.com/developers/gitignore/api/python,emacs +# Edit at https://www.toptal.com/developers/gitignore?templates=python,emacs + +### Emacs ### +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +# network security +/network-security.data + + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg MANIFEST -doc/build + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +pytestdebug.log + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +doc/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pythonenv* + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# profiling data +.prof + +# End of https://www.toptal.com/developers/gitignore/api/python,emacs diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 4854e53..0000000 --- a/.travis.yml +++ /dev/null @@ -1,5 +0,0 @@ -language: python -before_install: - pip install tox -script: - tox diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fcdf7ba --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +BSD 2-Clause License + +Copyright (c) 2012, Takafumi Arakaki +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile b/Makefile index b9ee208..d31c4ea 100644 --- a/Makefile +++ b/Makefile @@ -11,9 +11,4 @@ doc: cog ## Update files using cog.py cog: orgparse/__init__.py orgparse/__init__.py: README.rst - cd orgparse && cog.py -r __init__.py - - -## Upload to PyPI -upload: cog - python setup.py register sdist upload + cd src/orgparse && cog.py -r __init__.py diff --git a/README.rst b/README.rst index be978e2..e26fae6 100644 --- a/README.rst +++ b/README.rst @@ -1,35 +1,37 @@ =========================================================== - orgparse - Pyton module for reading Emacs org-mode file + orgparse - Python module for reading Emacs org-mode files =========================================================== -Links: - -* `Documentation (at Read the Docs) `_ -* `Repository (at GitHub) `_ -* `Issue tracker (at GitHub) `_ -* `PyPI `_ -* `Travis CI `_ |build-status| - -.. |build-status| - image:: https://secure.travis-ci.org/tkf/orgparse.png?branch=master - :target: http://travis-ci.org/tkf/orgparse - :alt: Build Status - +* `Documentation (Read the Docs) `_ +* `Repository (at GitHub) `_ +* `PyPI `_ +* `conda-forge `_ Install ------- -You can install `orgparse` from PyPI_:: +You can install ``orgpase`` via PyPI + +.. code-block:: console pip install orgparse +or via conda-forge + +.. code-block:: console + + conda install orgparse -c conda-forge + Usage ----- -Loading org object -^^^^^^^^^^^^^^^^^^ +There are pretty extensive doctests if you're interested in some specific method. Otherwise here are some example snippets: + + +Load org node +^^^^^^^^^^^^^ :: from orgparse import load, loads @@ -72,8 +74,8 @@ Traverse org tree * Heading 1 -Accessing to node attributes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Accessing node attributes +^^^^^^^^^^^^^^^^^^^^^^^^^ >>> root = loads(''' ... * DONE Heading :TAG: @@ -105,3 +107,13 @@ True 'some text' >>> node.body ' Body texts...' + + +Project status +-------------- + +Project is maintained by @karlicoss (myself). + +For my personal use, orgparse mostly has all features I need, so there hasn't been much active development lately. + +However, contributions are always welcome! Please provide tests along with your contribution if you're fixing bugs or adding new functionality. diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..627def8 --- /dev/null +++ b/conftest.py @@ -0,0 +1,58 @@ +# this is a hack to monkey patch pytest so it handles tests inside namespace packages without __init__.py properly +# without it, pytest can't discover the package root for some reason +# also see https://github.com/karlicoss/pytest_namespace_pkgs for more + +import os +import pathlib +from typing import Optional + +import _pytest.main +import _pytest.pathlib + +# we consider all dirs in repo/ to be namespace packages +root_dir = pathlib.Path(__file__).absolute().parent.resolve() / 'src' +assert root_dir.exists(), root_dir + +# TODO assert it contains package name?? maybe get it via setuptools.. + +namespace_pkg_dirs = [str(d) for d in root_dir.iterdir() if d.is_dir()] + +# resolve_package_path is called from _pytest.pathlib.import_path +# takes a full abs path to the test file and needs to return the path to the 'root' package on the filesystem +resolve_pkg_path_orig = _pytest.pathlib.resolve_package_path + + +def resolve_package_path(path: pathlib.Path) -> Optional[pathlib.Path]: + result = path # search from the test file upwards + for parent in result.parents: + if str(parent) in namespace_pkg_dirs: + return parent + if os.name == 'nt': + # ??? for some reason on windows it is trying to call this against conftest? but not on linux/osx + if path.name == 'conftest.py': + return resolve_pkg_path_orig(path) + raise RuntimeError("Couldn't determine path for ", path) + + +# NOTE: seems like it's not necessary anymore? +# keeping it for now just in case +# after https://github.com/pytest-dev/pytest/pull/13426 we should be able to remove the whole conftest +# _pytest.pathlib.resolve_package_path = resolve_package_path + + +# without patching, the orig function returns just a package name for some reason +# (I think it's used as a sort of fallback) +# so we need to point it at the absolute path properly +# not sure what are the consequences.. maybe it wouldn't be able to run against installed packages? not sure.. +search_pypath_orig = _pytest.main.search_pypath + + +def search_pypath(module_name: str) -> str: + mpath = root_dir / module_name.replace('.', os.sep) + if not mpath.is_dir(): + mpath = mpath.with_suffix('.py') + assert mpath.exists(), mpath # just in case + return str(mpath) + + +_pytest.main.search_pypath = search_pypath # ty: ignore[invalid-assignment] diff --git a/doc/source/conf.py b/doc/source/conf.py index ed2440c..1e451e7 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- - -from os.path import dirname import sys -sys.path.insert(0, dirname(dirname(dirname(__file__)))) +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) # -- General configuration ------------------------------------------------ extensions = [ @@ -15,14 +14,18 @@ source_suffix = '.rst' master_doc = 'index' +# TODO not sure I'm doing that right.. +import orgparse + # General information about the project. -project = u'orgparse' -copyright = u'2012, Takafumi Arakaki' +project = 'orgparse' +copyright = '2012, Takafumi Arakaki' # noqa: A001 # The short X.Y version. -version = '0.0.1' +# TODO use setup.py for version +version = orgparse.__version__ # ty: ignore[unresolved-attribute] # The full version, including alpha/beta/rc tags. -release = '0.0.1.dev3' +release = orgparse.__version__ # ty: ignore[unresolved-attribute] exclude_patterns = [] @@ -39,22 +42,19 @@ # -- Options for LaTeX output --------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto/manual]). latex_documents = [ - ('index', 'orgparse.tex', u'orgparse Documentation', - u'Takafumi Arakaki', 'manual'), + ('index', 'orgparse.tex', 'orgparse Documentation', 'Takafumi Arakaki', 'manual'), ] @@ -62,12 +62,11 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'orgparse', u'orgparse Documentation', - [u'Takafumi Arakaki'], 1) + ('index', 'orgparse', 'orgparse Documentation', ['Takafumi Arakaki'], 1), ] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------- @@ -75,9 +74,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'orgparse', u'orgparse Documentation', - u'Takafumi Arakaki', 'orgparse', 'One line description of project.', - 'Miscellaneous'), + ( + 'index', + 'orgparse', + 'orgparse Documentation', + 'Takafumi Arakaki', + 'orgparse', + 'One line description of project.', + 'Miscellaneous', + ), ] @@ -89,4 +94,4 @@ autodoc_member_order = 'bysource' autodoc_default_flags = ['members'] -inheritance_graph_attrs = dict(rankdir="TB") +inheritance_graph_attrs = {'rankdir': "TB"} diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..7b1e535 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,18 @@ +[mypy] +pretty = True +show_error_context = True +show_column_numbers = True +show_error_end = True + +check_untyped_defs = True + +# see https://mypy.readthedocs.io/en/stable/error_code_list2.html +warn_redundant_casts = True +strict_equality = True +warn_unused_ignores = True +enable_error_code = deprecated,redundant-expr,possibly-undefined,truthy-bool,truthy-iterable,ignore-without-code,unused-awaitable + + +# an example of suppressing +# [mypy-my.config.repos.pdfannots.pdfannots] +# ignore_errors = True diff --git a/orgparse/tests/data/00_simple.py b/orgparse/tests/data/00_simple.py deleted file mode 100644 index 8f7da41..0000000 --- a/orgparse/tests/data/00_simple.py +++ /dev/null @@ -1,30 +0,0 @@ -def nodedict(i, level, todo=None, shallow_tags=set([]), tags=set([])): - return dict( - heading="Heading {0}".format(i), - level=level, - todo=todo, - shallow_tags=shallow_tags, - tags=tags, - ) - - -def tags(nums): - return set(map('TAG{0}'.format, nums)) - - -data = [ - nodedict(i, *vals) for (i, vals) in enumerate([ - [1, 'TODO1', tags([1]), tags(range(1, 2))], - [2, 'TODO2', tags([2]), tags(range(1, 3))], - [3, 'TODO3', tags([3]), tags(range(1, 4))], - [4, 'TODO4', tags([4]), tags(range(1, 5))], - [2, None, tags([]), tags([1])], - [2, None, tags([]), tags([1])], - [1, None, tags([2]), tags([2])], - [2, None, tags([2]), tags([2])], - [3, None, tags([]), tags([2])], - [5, None, tags([3, 4]), tags([2, 3, 4])], - [4, None, tags([1]), tags([1, 2])], - [2, None, tags([]), tags([2])], - [1], - ])] diff --git a/orgparse/tests/data/01_attributes.py b/orgparse/tests/data/01_attributes.py deleted file mode 100644 index 498766c..0000000 --- a/orgparse/tests/data/01_attributes.py +++ /dev/null @@ -1,41 +0,0 @@ -from orgparse.date import ( - OrgDate, OrgDateScheduled, OrgDateDeadline, OrgDateClosed, - OrgDateClock, -) - -node1 = dict( - heading="A node with a lot of attributes", - priority='A', - scheduled=OrgDateScheduled((2010, 8, 6)), - deadline=OrgDateDeadline((2010, 8, 10)), - closed=OrgDateClosed((2010, 8, 8, 18, 0)), - clock=[ - OrgDateClock((2010, 8, 8, 17, 40), (2010, 8, 8, 17, 50), 10), - OrgDateClock((2010, 8, 8, 17, 00), (2010, 8, 8, 17, 30), 30), - ], - properties=dict(Effort=70), - datelist=[OrgDate((2010, 8, 16))], - rangelist=[ - OrgDate((2010, 8, 7), (2010, 8, 8)), - OrgDate((2010, 8, 9, 0, 30), (2010, 8, 10, 13, 20)), - ], - body="""\ - - <2010-08-16 Mon> DateList - - <2010-08-07 Sat>--<2010-08-08 Sun> - - <2010-08-09 Mon 00:30>--<2010-08-10 Tue 13:20> RangeList""" -) - -node2 = dict( - heading="A node without any attributed", - priority=None, - scheduled=OrgDate(None), - deadline=OrgDate(None), - closed=OrgDate(None), - clock=[], - properties={}, - datelist=[], - rangelist=[], - body="", -) - -data = [node1, node2, node1] diff --git a/orgparse/tests/test_data.py b/orgparse/tests/test_data.py deleted file mode 100644 index 857d5d4..0000000 --- a/orgparse/tests/test_data.py +++ /dev/null @@ -1,82 +0,0 @@ -import os -from glob import glob -import pickle -from nose.tools import eq_ - -from .. import load -from ..utils.py3compat import execfile - - -DATADIR = os.path.join(os.path.dirname(__file__), 'data') - - -def load_data(path): - """Load data from python file""" - ns = {} - execfile(path, ns) - return ns['data'] - - -def value_from_data_key(node, key): - """ - Helper function for check_data. Get value from Orgnode by key. - """ - if key == 'tags_inher': - return node.tags - elif key == 'children_heading': - return [c.heading for c in node.children] - elif key in ('parent_heading', - 'previous_same_level_heading', - 'next_same_level_heading', - ): - othernode = getattr(node, key.rsplit('_', 1)[0]) - if othernode and not othernode.is_root(): - return othernode.heading - else: - return - else: - return getattr(node, key) - - -def data_path(dataname, ext): - return os.path.join(DATADIR, '{0}.{1}'.format(dataname, ext)) - - -def get_datanames(): - for oname in sorted(glob(os.path.join(DATADIR, '*.org'))): - yield os.path.splitext(os.path.basename(oname))[0] - - -def check_data(dataname): - """ - Compare parsed data from 'data/*.org' and its correct answer 'data/*.py' - """ - oname = data_path(dataname, "org") - data = load_data(data_path(dataname, "py")) - root = load(oname) - - for (i, (node, kwds)) in enumerate(zip(root[1:], data)): - for key in kwds: - val = value_from_data_key(node, key) - eq_(kwds[key], val, - msg=('check value of {0}-th node of key "{1}" from "{2}".' - '\n\nParsed:\n{3}\n\nReal:\n{4}' - ).format(i, key, dataname, val, kwds[key])) - - eq_(root.env.filename, oname) - - -def test_data(): - for dataname in get_datanames(): - yield (check_data, dataname) - - -def check_picklable(dataname): - oname = data_path(dataname, "org") - root = load(oname) - pickle.dumps(root) - - -def test_picklable(): - for dataname in get_datanames(): - yield (check_picklable, dataname) diff --git a/orgparse/tests/test_hugedata.py b/orgparse/tests/test_hugedata.py deleted file mode 100644 index b72e27f..0000000 --- a/orgparse/tests/test_hugedata.py +++ /dev/null @@ -1,36 +0,0 @@ -try: - import cPickle as pickle -except ImportError: - import pickle - -from nose.tools import eq_ - -from .. import loadi - - -def generate_org_lines(num_top_nodes, depth=3, nodes_per_level=1, _level=1): - if depth == 0: - return - for i in range(num_top_nodes): - yield ("*" * _level) + ' {0}-th heading of level {1}'.format(i, _level) - for child in generate_org_lines( - nodes_per_level, depth - 1, nodes_per_level, _level + 1): - yield child - - -def num_generate_org_lines(num_top_nodes, depth=3, nodes_per_level=1): - if depth == 0: - return 0 - return num_top_nodes * ( - 1 + num_generate_org_lines( - nodes_per_level, depth - 1, nodes_per_level)) - - -def test_picklable(): - num = 1000 - depth = 3 - nodes_per_level = 1 - root = loadi(generate_org_lines(num, depth, nodes_per_level)) - eq_(sum(1 for _ in root), - num_generate_org_lines(num, depth, nodes_per_level) + 1) - pickle.dumps(root) # should not fail diff --git a/orgparse/utils/_py3compat.py b/orgparse/utils/_py3compat.py deleted file mode 100644 index 820e310..0000000 --- a/orgparse/utils/_py3compat.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -Python 3 compatibility code which is loaded only when from Python 3. -""" - - -def execfile(filename, *args): - return exec( - compile(open(filename).read(), filename, 'exec'), - *args) diff --git a/orgparse/utils/py3compat.py b/orgparse/utils/py3compat.py deleted file mode 100644 index 6a9059e..0000000 --- a/orgparse/utils/py3compat.py +++ /dev/null @@ -1,18 +0,0 @@ -import sys - -PY3 = (sys.version_info[0] >= 3) - -try: - # Python 2 - unicode = unicode - basestring = basestring -except NameError: - # Python 3 - basestring = unicode = str - -PY3 = (sys.version_info[0] >= 3) - -if PY3: - from ._py3compat import execfile -else: - execfile = execfile diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0aa9ba0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,53 @@ +[project] +dynamic = ["version"] # version is managed by build backend +name = "orgparse" +dependencies = [ +] +requires-python = ">=3.9" +description = "orgparse - Emacs org-mode parser in Python" +license = {file = "LICENSE"} +authors = [ + {name = "Takafumi Arakaki (@tkf)", email = "aka.tkf@gmail.com"}, + {name = "Dmitrii Gerasimov (@karlicoss)", email = "karlicoss@gmail.com"}, +] +maintainers = [ + {name = "Dmitrii Gerasimov (@karlicoss)", email = "karlicoss@gmail.com"}, +] +keywords = ["org", "org-mode", "emacs"] +# see: http://pypi.python.org/pypi?%3Aaction=list_classifiers +classifiers = [ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: BSD License", + "Topic :: Text Processing :: Markup", +] +# TODO add it back later, perhaps via ast? +# long_description=orgparse.__doc__, + +[project.urls] +Homepage = "https://github.com/karlicoss/orgparse" + +[project.optional-dependencies] +[dependency-groups] +testing = [ + "pytest", + "ruff", + "mypy", + "lxml", # for mypy html coverage + "ty>=0.0.1a25", +] + + +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +# unfortunately have to duplicate project name here atm, see https://github.com/pypa/hatch/issues/1894 +[tool.hatch.build.targets.wheel] +packages = ["src/orgparse"] + +[tool.hatch.version] +source = "vcs" + +[tool.hatch.version.raw-options] +version_scheme = "python-simplified-semver" +local_scheme = "dirty-tag" diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..226488b --- /dev/null +++ b/pytest.ini @@ -0,0 +1,20 @@ +[pytest] +# discover files that don't follow test_ naming. Useful to keep tests along with the source code +python_files = *.py + +# this setting only impacts package/module naming under pytest, not the discovery +consider_namespace_packages = true + +addopts = + # prevent pytest cache from being created... it craps into project dir and I never use it anyway + -p no:cacheprovider + + # -rap to print tests summary even when they are successful + -rap + --verbose + + # otherwise it won't discover doctests + --doctest-modules + + # show all test durations (unless they are too short) + --durations=0 diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..e05c3b4 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,102 @@ +line-length = 120 # impacts import sorting + +lint.extend-select = [ + "ALL", +] + +# Preserve types, even if a file imports `from __future__ import annotations` +# we need this for cachew to work with HPI types on 3.9 +# can probably remove after 3.10? +lint.pyupgrade.keep-runtime-typing = true + +lint.ignore = [ + "D", # annoying nags about docstrings + "N", # pep naming + "TCH", # type checking rules, mostly just suggests moving imports under TYPE_CHECKING + "S", # bandit (security checks) -- tends to be not very useful, lots of nitpicks + "DTZ", # datetimes checks -- complaining about missing tz and mostly false positives + "FIX", # complains about fixmes/todos -- annoying + "TD", # complains about todo formatting -- too annoying + "ANN", # missing type annotations? seems way to strict though + "EM" , # suggests assigning all exception messages into a variable first... pretty annoying + +### too opinionated style checks + "E501", # too long lines + "E731", # assigning lambda instead of using def + "E741", # Ambiguous variable name: `l` + "E742", # Ambiguous class name: `O + "E401", # Multiple imports on one line + "F403", # import *` used; unable to detect undefined names +### + +### + "E722", # Do not use bare `except` ## Sometimes it's useful for defensive imports and that sort of thing.. + "F811", # Redefinition of unused # this gets in the way of pytest fixtures (e.g. in cachew) + +## might be nice .. but later and I don't wanna make it strict + "E402", # Module level import not at top of file + +### these are just nitpicky, we usually know better + "PLR0911", # too many return statements + "PLR0912", # too many branches + "PLR0913", # too many function arguments + "PLR0915", # too many statements + "PLR1714", # consider merging multiple comparisons + "PLR2044", # line with empty comment + "PLR5501", # use elif instead of else if + "PLR2004", # magic value in comparison -- super annoying in tests +### + "PLR0402", # import X.Y as Y -- TODO maybe consider enabling it, but double check + + "B009", # calling gettattr with constant attribute -- this is useful to convince mypy + "B010", # same as above, but setattr + "B017", # pytest.raises(Exception) + "B023", # seems to result in false positives? + + # complains about useless pass, but has sort of a false positive if the function has a docstring? + # this is common for click entrypoints (e.g. in __main__), so disable + "PIE790", + + # a bit too annoying, offers to convert for loops to list comprehension + # , which may heart readability + "PERF401", + + # suggests no using exception in for loops + # we do use this technique a lot, plus in 3.11 happy path exception handling is "zero-cost" + "PERF203", + + "RET504", # unnecessary assignment before returning -- that can be useful for readability + "RET505", # unnecessary else after return -- can hurt readability + + "PLW0603", # global variable update.. we usually know why we are doing this + "PLW2901", # for loop variable overwritten, usually this is intentional + + "PT011", # pytest raises is too broad + + "COM812", # trailing comma missing -- mostly just being annoying with long multiline strings + + "TRY003", # suggests defining exception messages in exception class -- kinda annoying + "TRY201", # raise without specifying exception name -- sometimes hurts readability + "TRY400", # a bit dumb, and results in false positives (see https://github.com/astral-sh/ruff/issues/18070) + "TRY401", # redundant exception in logging.exception call? TODO double check, might result in excessive logging + + "TID252", # Prefer absolute imports over relative imports from parent modules + + ## too annoying + "T20", # just complains about prints and pprints (TODO maybe consider later?) + "Q", # flake quotes, too annoying + "C90", # some complexity checking + "G004", # logging statement uses f string + "ERA001", # commented out code + "SLF001", # private member accessed + "BLE001", # do not catch 'blind' Exception + "INP001", # complains about implicit namespace packages + "SIM102", # if statements collapsing, often hurts readability + "SIM103", # multiple conditions collapsing, often hurts readability + "SIM105", # suggests using contextlib.suppress instad of try/except -- this wouldn't be mypy friendly + "SIM108", # suggests using ternary operation instead of if -- hurts readability + "SIM110", # suggests using any(...) instead of for look/return -- hurts readability + "SIM117", # suggests using single with statement instead of nested -- doesn't work in tests + "RSE102", # complains about missing parens in exceptions + ## +] diff --git a/setup.py b/setup.py deleted file mode 100644 index dfcf8ad..0000000 --- a/setup.py +++ /dev/null @@ -1,35 +0,0 @@ -from distutils.core import setup - -import orgparse - -setup( - name='orgparse', - version=orgparse.__version__, - packages=[ - 'orgparse', - 'orgparse.utils', - 'orgparse.tests', - 'orgparse.tests.data', - ], - package_data={ - 'orgparse.tests.data': ['*.org'], - }, - author=orgparse.__author__, - author_email='aka.tkf@gmail.com', - url='https://github.com/tkf/orgparse', - license=orgparse.__license__, - description='orgparse - Emacs org-mode parser in Python', - long_description=orgparse.__doc__, - keywords='org-mode, Emacs, parser', - classifiers=[ - "Development Status :: 3 - Alpha", - 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.2', - # see: http://pypi.python.org/pypi?%3Aaction=list_classifiers - ], -) diff --git a/orgparse/__init__.py b/src/orgparse/__init__.py similarity index 55% rename from orgparse/__init__.py rename to src/orgparse/__init__.py index cb3741d..110c474 100644 --- a/orgparse/__init__.py +++ b/src/orgparse/__init__.py @@ -1,41 +1,32 @@ # Import README.rst using cog # [[[cog # from cog import out -# out('"""\n{0}\n"""'.format(file('../README.rst').read())) +# out('"""\n{0}\n"""'.format(open('../README.rst').read())) # ]]] """ =========================================================== - orgparse - Pyton module for reading Emacs org-mode file + orgparse - Python module for reading Emacs org-mode files =========================================================== -Links: - -* `Documentation (at Read the Docs) `_ -* `Repository (at GitHub) `_ -* `Issue tracker (at GitHub) `_ -* `PyPI `_ -* `Travis CI `_ |build-status| - -.. |build-status| - image:: https://secure.travis-ci.org/tkf/orgparse.png?branch=master - :target: http://travis-ci.org/tkf/orgparse - :alt: Build Status - +* `Documentation (Read the Docs) `_ +* `Repository (at GitHub) `_ +* `PyPI `_ Install ------- -You can install `orgparse` from PyPI_:: - pip install orgparse Usage ----- -Loading org object -^^^^^^^^^^^^^^^^^^ +There are pretty extensive doctests if you're interested in some specific method. Otherwise here are some example snippets: + + +Load org node +^^^^^^^^^^^^^ :: from orgparse import load, loads @@ -78,8 +69,8 @@ * Heading 1 -Accessing to node attributes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Accessing node attributes +^^^^^^^^^^^^^^^^^^^^^^^^^ >>> root = loads(''' ... * DONE Heading :TAG: @@ -115,52 +106,60 @@ """ # [[[end]]] -import codecs +from collections.abc import Iterable +from pathlib import Path +from typing import Optional, TextIO, Union -from .node import parse_lines -from .utils.py3compat import basestring +from .node import OrgEnv, OrgNode, parse_lines # todo basenode?? -__version__ = '0.0.1.dev3' -__author__ = 'Takafumi Arakaki' -__license__ = 'BSD License' -__all__ = ["load", "loads", "loadi"] +__all__ = ["load", "loadi", "loads"] -def load(path): +def load(path: Union[str, Path, TextIO], env: Optional[OrgEnv] = None) -> OrgNode: """ Load org-mode document from a file. :type path: str or file-like - :arg path: Path to org file or file-like object of a org document. + :arg path: Path to org file or file-like object of an org document. :rtype: :class:`orgparse.node.OrgRootNode` """ - if isinstance(path, basestring): - orgfile = codecs.open(path, encoding='utf8') - filename = path - else: - orgfile = path - filename = path.name if hasattr(path, 'name') else '' - return loadi((l.rstrip('\n') for l in orgfile.readlines()), - filename=filename) + # Make sure it is a Path object. + if isinstance(path, str): + path = Path(path) + + # if it is a Path + if isinstance(path, Path): + # open that Path + with path.open('r', encoding='utf8') as orgfile: + # try again loading + return load(orgfile, env) + + # We assume it is a file-like object (e.g. io.StringIO) + all_lines = (line.rstrip('\n') for line in path) + + # get the filename + filename = path.name if hasattr(path, 'name') else '' + + return loadi(all_lines, filename=filename, env=env) -def loads(string, filename=''): +def loads(string: str, filename: str = '', env: Optional[OrgEnv] = None) -> OrgNode: """ Load org-mode document from a string. :rtype: :class:`orgparse.node.OrgRootNode` """ - return loadi(string.splitlines(), filename=filename) + return loadi(string.splitlines(), filename=filename, env=env) -def loadi(lines, filename=''): +def loadi(lines: Iterable[str], filename: str = '', env: Optional[OrgEnv] = None) -> OrgNode: """ Load org-mode document from an iterative object. :rtype: :class:`orgparse.node.OrgRootNode` """ - return parse_lines(lines, filename=filename) + return parse_lines(lines, filename=filename, env=env) diff --git a/orgparse/date.py b/src/orgparse/date.py similarity index 54% rename from orgparse/date.py rename to src/orgparse/date.py index 2600b19..1685f32 100644 --- a/orgparse/date.py +++ b/src/orgparse/date.py @@ -1,21 +1,26 @@ +from __future__ import annotations + import datetime import re +from datetime import timedelta +from typing import Optional, Union + +DateIsh = Union[datetime.date, datetime.datetime] -def total_seconds(td): +def total_seconds(td: timedelta) -> float: """Equivalent to `datetime.timedelta.total_seconds`.""" - return float(td.microseconds + - (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6 + return float(td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 -def total_minutes(td): +def total_minutes(td: timedelta) -> float: """Alias for ``total_seconds(td) / 60``.""" return total_seconds(td) / 60 -def gene_timestamp_regex(brtype, prefix=None, nocookie=False): +def gene_timestamp_regex(brtype: str, prefix: str | None = None, *, nocookie: bool = False) -> str: """ - Generate timetamp regex for active/inactive/nobrace brace type + Generate timestamp regex for active/inactive/nobrace brace type :type brtype: {'active', 'inactive', 'nobrace'} :arg brtype: @@ -44,12 +49,15 @@ def gene_timestamp_regex(brtype, prefix=None, nocookie=False): >>> '{year}-{month}-{day}'.format(**m.groupdict()) '2010-06-21' >>> m = timestamp_re.match('<2005-10-01 Sat 12:30 +7m -3d>') - >>> m.groupdict() == { - ... 'year': '2005', 'month': '10', 'day': '01', - ... 'hour': '12', 'min': '30', - ... 'repeatpre': '+', 'repeatnum': '7', 'repeatdwmy': 'm', - ... 'warnpre': '-', 'warnnum': '3', 'warndwmy': 'd'} - True + >>> from collections import OrderedDict + >>> sorted(m.groupdict().items()) + ... # doctest: +NORMALIZE_WHITESPACE + [('day', '01'), + ('end_hour', None), ('end_min', None), + ('hour', '12'), ('min', '30'), + ('month', '10'), + ('repeatdwmy', 'm'), ('repeatnum', '7'), ('repeatpre', '+'), + ('warndwmy', 'd'), ('warnnum', '3'), ('warnpre', '-'), ('year', '2005')] When ``brtype = 'nobrace'``, cookie part cannot be retrieved. @@ -63,11 +71,12 @@ def gene_timestamp_regex(brtype, prefix=None, nocookie=False): >>> '{year}-{month}-{day}'.format(**m.groupdict()) '2010-06-21' >>> m = timestamp_re.match('2005-10-01 Sat 12:30 +7m -3d') - >>> m.groupdict() == { - ... 'year': '2005', 'month': '10', 'day': '01', - ... 'hour': '12', 'min': '30'} - True - + >>> sorted(m.groupdict().items()) + ... # doctest: +NORMALIZE_WHITESPACE + [('day', '01'), + ('end_hour', None), ('end_min', None), + ('hour', '12'), ('min', '30'), + ('month', '10'), ('year', '2005')] """ if brtype == 'active': @@ -77,15 +86,15 @@ def gene_timestamp_regex(brtype, prefix=None, nocookie=False): elif brtype == 'nobrace': (bo, bc) = ('', '') else: - raise ValueError("brtype='{0!r}' is invalid".format(brtype)) + raise ValueError(f"brtype='{brtype!r}' is invalid") if brtype == 'nobrace': ignore = r'[\s\w]' else: - ignore = '[^{bc}]'.format(bc=bc) + ignore = f'[^{bc}]' if prefix is None: - prefix = '{0}_'.format(brtype) + prefix = f'{brtype}_' regex_date_time = r""" (?P<{prefix}year>\d{{4}}) - @@ -95,6 +104,11 @@ def gene_timestamp_regex(brtype, prefix=None, nocookie=False): ({ignore}+?) (?P<{prefix}hour>\d{{2}}) : (?P<{prefix}min>\d{{2}}) + ( # optional end time range + --? + (?P<{prefix}end_hour>\d{{2}}) : + (?P<{prefix}end_min>\d{{2}}) + )? )? """ regex_cookie = r""" @@ -102,37 +116,65 @@ def gene_timestamp_regex(brtype, prefix=None, nocookie=False): ({ignore}+?) (?P<{prefix}repeatpre> [\.\+]{{1,2}}) (?P<{prefix}repeatnum> \d+) - (?P<{prefix}repeatdwmy> [dwmy]) + (?P<{prefix}repeatdwmy> [hdwmy]) )? ( # optional warning ({ignore}+?) (?P<{prefix}warnpre> \-) (?P<{prefix}warnnum> \d+) - (?P<{prefix}warndwmy> [dwmy]) + (?P<{prefix}warndwmy> [hdwmy]) )? """ - # http://www.pythonregex.com/ regex = ''.join([ bo, regex_date_time, regex_cookie if nocookie or brtype != 'nobrace' else '', '({ignore}*?)', - bc]) + bc, + ]) # fmt: skip return regex.format(prefix=prefix, ignore=ignore) +def date_time_format(date: DateIsh) -> str: + """ + Format a date or datetime in default org format + + @param date The date + + @return Formatted date(time) + """ + default_format_date = "%Y-%m-%d %a" + default_format_datetime = "%Y-%m-%d %a %H:%M" + is_datetime = isinstance(date, datetime.datetime) + + return date.strftime(default_format_datetime if is_datetime else default_format_date) + + +def is_same_day(date0, date1) -> bool: + """ + Check if two dates or datetimes are on the same day + """ + return OrgDate._date_to_tuple(date0)[:3] == OrgDate._date_to_tuple(date1)[:3] + + TIMESTAMP_NOBRACE_RE = re.compile( gene_timestamp_regex('nobrace', prefix=''), - re.VERBOSE) + re.VERBOSE, +) TIMESTAMP_RE = re.compile( - '|'.join((gene_timestamp_regex('active'), - gene_timestamp_regex('inactive'))), - re.VERBOSE) + '|'.join(( + gene_timestamp_regex('active'), + gene_timestamp_regex('inactive'), + )), + re.VERBOSE, +) # fmt: skip + +_Repeater = tuple[str, int, str] -class OrgDate(object): +class OrgDate: _active_default = True """ The default active value. @@ -142,7 +184,22 @@ class OrgDate(object): """ - def __init__(self, start, end=None, active=None): + """ + When formatting the date to string via __str__, and there is an end date on + the same day as the start date, allow formatting in the short syntax + <2021-09-03 Fri 16:01--17:30>? Otherwise the string represenation would be + <2021-09-03 Fri 16:01>--<2021-09-03 Fri 17:30> + """ + _allow_short_range = True + + def __init__( + self, + start, + end=None, + active: bool | None = None, # noqa: FBT001 + repeater: _Repeater | None = None, + warning: _Repeater | None = None, + ) -> None: """ Create :class:`OrgDate` object @@ -155,6 +212,10 @@ def __init__(self, start, end=None, active=None): :arg active: Active/inactive flag. None means using its default value, which may be different for different subclasses. + :type repeater: tuple or None + :arg repeater: Repeater interval. + :type warning: tuple or None + :arg warning: Deadline warning interval. >>> OrgDate(datetime.date(2012, 2, 10)) OrgDate((2012, 2, 10)) @@ -180,9 +241,12 @@ def __init__(self, start, end=None, active=None): self._start = self._to_date(start) self._end = self._to_date(end) self._active = self._active_default if active is None else active + # repeater and warning are tuples of (prefix, number, interval) + self._repeater = repeater + self._warning = warning @staticmethod - def _to_date(date): + def _to_date(date) -> DateIsh: if isinstance(date, (tuple, list)): if len(date) == 3: return datetime.date(*date) @@ -192,51 +256,79 @@ def _to_date(date): raise ValueError( "Automatic conversion to the datetime object " "requires at least 3 elements in the tuple. " - "Only {0} elements are in the given tuple '{1}'." - .format(len(date), date)) + f"Only {len(date)} elements are in the given tuple '{date}'." + ) elif isinstance(date, (int, float)): return datetime.datetime.fromtimestamp(date) else: return date @staticmethod - def _date_to_tuple(date): + def _date_to_tuple(date: DateIsh) -> tuple[int, ...]: if isinstance(date, datetime.datetime): return tuple(date.timetuple()[:6]) elif isinstance(date, datetime.date): return tuple(date.timetuple()[:3]) + else: + raise TypeError(f"can't happen: {date} {type(date)}") - def __repr__(self): + def __repr__(self) -> str: args = [ self.__class__.__name__, self._date_to_tuple(self.start), self._date_to_tuple(self.end) if self.has_end() else None, None if self._active is self._active_default else self._active, + self._repeater, + self._warning, ] - if args[2] is None and args[3] is None: - return '{0}({1!r})'.format(*args) - elif args[3] is None: - return '{0}({1!r}, {2!r})'.format(*args) - else: - return '{0}({1!r}, {2!r}, {3!r})'.format(*args) + while args[-1] is None: + args.pop() + if len(args) > 3 and args[3] is None: + args[3] = self._active_default + return '{}({})'.format(args[0], ', '.join(map(repr, args[1:]))) + + def __str__(self) -> str: + fence = ("<", ">") if self.is_active() else ("[", "]") + + start = date_time_format(self.start) + end = None + + if self.has_end(): + if self._allow_short_range and is_same_day(self.start, self.end): + start += "--{}".format(self.end.strftime("%H:%M")) + else: + end = date_time_format(self.end) + + if self._repeater is not None: + (x, y, z) = self._repeater + start += f" {x}{y}{z}" + if self._warning is not None: + (x, y, z) = self._warning + start += f" {x}{y}{z}" + ret = f"{fence[0]}{start}{fence[1]}" + if end: + ret += f"--{fence[0]}{end}{fence[1]}" - def __nonzero__(self): + return ret + + def __bool__(self) -> bool: return bool(self._start) - __bool__ = __nonzero__ # PY3 + def __hash__(self) -> int: + return hash((self._start, self._end, self._active, self._repeater, self._warning)) - def __eq__(self, other): - if (isinstance(other, OrgDate) and - self._start is None and - other._start is None): + def __eq__(self, other) -> bool: + if isinstance(other, OrgDate) and self._start is None and other._start is None: return True - return (isinstance(other, self.__class__) and - self._start == other._start and - self._end == other._end and - self._active == other._active) + return ( + isinstance(other, self.__class__) + and self._start == other._start + and self._end == other._end + and self._active == other._active + ) @property - def start(self): + def start(self) -> DateIsh: """ Get date or datetime object @@ -249,7 +341,7 @@ def start(self): return self._start @property - def end(self): + def end(self) -> DateIsh: """ Get date or datetime object @@ -261,15 +353,15 @@ def end(self): """ return self._end - def is_active(self): + def is_active(self) -> bool: """Return true if the date is active""" return self._active - def has_end(self): + def has_end(self) -> bool: """Return true if it has the end date""" return bool(self._end) - def has_time(self): + def has_time(self) -> bool: """ Return true if the start date has time field @@ -281,7 +373,7 @@ def has_time(self): """ return isinstance(self._start, datetime.datetime) - def has_overlap(self, other): + def has_overlap(self, other) -> bool: """ Test if it has overlap with other :class:`OrgDate` instance @@ -303,11 +395,10 @@ def has_overlap(self, other): if not isinstance(other, OrgDate): other = OrgDate(other) if self.has_end(): - return (self._datetime_in_range(other.start) or - self._datetime_in_range(other.end)) + return self._datetime_in_range(other.start) or self._datetime_in_range(other.end) elif other.has_end(): return other._datetime_in_range(self.start) - elif self.start == other.get_start: + elif self.start == other.start: return True else: return False @@ -321,18 +412,32 @@ def _datetime_in_range(self, date): return False @staticmethod - def _as_datetime(date): - if isinstance(date, datetime.date): + def _as_datetime(date) -> datetime.datetime: + """ + Convert the given date into datetime (if it already is, return it + unmodified + """ + if not isinstance(date, datetime.datetime): return datetime.datetime(*date.timetuple()[:3]) return date @staticmethod - def _datetuple_from_groupdict(dct, prefix=''): - keys = ['year', 'month', 'day', 'hour', 'min'] - return list(map(int, filter(None, (dct[prefix + k] for k in keys)))) + def _daterange_from_groupdict(dct, prefix='') -> tuple[list, Optional[list]]: + start_keys = ['year', 'month', 'day', 'hour' , 'min'] # fmt: skip + end_keys = ['year', 'month', 'day', 'end_hour', 'end_min'] # fmt: skip + start_range = list(map(int, filter(None, (dct[prefix + k] for k in start_keys)))) + end_range: Optional[list] + end_range = list(map(int, filter(None, (dct[prefix + k] for k in end_keys)))) + if len(end_range) < len(end_keys): + end_range = None + return (start_range, end_range) + + @classmethod + def _datetuple_from_groupdict(cls, dct, prefix=''): + return cls._daterange_from_groupdict(dct, prefix=prefix)[0] @classmethod - def list_from_str(cls, string): + def list_from_str(cls, string: str) -> list[OrgDate]: """ Parse string and return a list of :class:`OrgDate` objects @@ -344,11 +449,13 @@ def list_from_str(cls, string): [OrgDate((2012, 2, 10)), OrgDate((2012, 2, 12), None, False)] >>> OrgDate.list_from_str("this is not timestamp") [] - + >>> OrgDate.list_from_str("<2012-02-11 Sat 10:11--11:20>") + [OrgDate((2012, 2, 11, 10, 11, 0), (2012, 2, 11, 11, 20, 0))] """ + cookie_suffix = ['pre', 'num', 'dwmy'] match = TIMESTAMP_RE.search(string) if match: - rest = string[match.end():] + rest = string[match.end() :] mdict = match.groupdict() if mdict['active_year']: prefix = 'active_' @@ -358,27 +465,39 @@ def list_from_str(cls, string): prefix = 'inactive_' active = False rangedash = '--[' + repeater: Optional[tuple[str, int, str]] = None + warning: Optional[tuple[str, int, str]] = None + if mdict[prefix + 'repeatpre'] is not None: + keys = [prefix + 'repeat' + suffix for suffix in cookie_suffix] + values = [mdict[k] for k in keys] + repeater = (values[0], int(values[1]), values[2]) + if mdict[prefix + 'warnpre'] is not None: + keys = [prefix + 'warn' + suffix for suffix in cookie_suffix] + values = [mdict[k] for k in keys] + warning = (values[0], int(values[1]), values[2]) has_rangedash = rest.startswith(rangedash) match2 = TIMESTAMP_RE.search(rest) if has_rangedash else None if has_rangedash and match2: - rest = rest[match2.end():] + rest = rest[match2.end() :] # no need for check activeness here because of the rangedash mdict2 = match2.groupdict() odate = cls( cls._datetuple_from_groupdict(mdict, prefix), cls._datetuple_from_groupdict(mdict2, prefix), - active=active) + active=active, + repeater=repeater, + warning=warning, + ) else: odate = cls( - cls._datetuple_from_groupdict(mdict, prefix), - active=active) - # FIXME: treat "repeater" and "warn" - return [odate] + cls.list_from_str(rest) + *cls._daterange_from_groupdict(mdict, prefix), active=active, repeater=repeater, warning=warning + ) + return [odate, *cls.list_from_str(rest)] else: return [] @classmethod - def from_str(cls, string): + def from_str(cls, string: str) -> OrgDate: """ Parse string and return an :class:`OrgDate` objects. @@ -391,8 +510,7 @@ def from_str(cls, string): match = cls._from_str_re.match(string) if match: mdict = match.groupdict() - return cls(cls._datetuple_from_groupdict(mdict), - active=cls._active_default) + return cls(cls._datetuple_from_groupdict(mdict), active=cls._active_default) else: return cls(None) @@ -402,54 +520,77 @@ def from_str(cls, string): def compile_sdc_re(sdctype): brtype = 'inactive' if sdctype == 'CLOSED' else 'active' return re.compile( - r'{0}:\s+{1}'.format( + r'^(?!\#).*{}:\s+{}'.format( sdctype, - gene_timestamp_regex(brtype, prefix='', nocookie=True)), - re.VERBOSE) + gene_timestamp_regex(brtype, prefix='', nocookie=True), + ), + re.VERBOSE, + ) class OrgDateSDCBase(OrgDate): - _re = None # override this! # FIXME: use OrgDate.from_str @classmethod def from_str(cls, string): - match = cls._re.search(string) + rgx = cls._re + assert rgx is not None + match = rgx.search(string) if match: mdict = match.groupdict() - return cls(cls._datetuple_from_groupdict(mdict), - active=cls._active_default) + start = cls._datetuple_from_groupdict(mdict) + end = None + end_hour = mdict['end_hour'] + end_min = mdict['end_min'] + if end_hour is not None and end_min is not None: + end_dict = {} + end_dict.update(mdict) + end_dict.update({'hour': end_hour, 'min': end_min}) + end = cls._datetuple_from_groupdict(end_dict) + cookie_suffix = ['pre', 'num', 'dwmy'] + repeater: Optional[tuple[str, int, str]] = None + warning: Optional[tuple[str, int, str]] = None + prefix = '' + if mdict[prefix + 'repeatpre'] is not None: + keys = [prefix + 'repeat' + suffix for suffix in cookie_suffix] + values = [mdict[k] for k in keys] + repeater = (values[0], int(values[1]), values[2]) + if mdict[prefix + 'warnpre'] is not None: + keys = [prefix + 'warn' + suffix for suffix in cookie_suffix] + values = [mdict[k] for k in keys] + warning = (values[0], int(values[1]), values[2]) + return cls(start, end, active=cls._active_default, repeater=repeater, warning=warning) else: return cls(None) class OrgDateScheduled(OrgDateSDCBase): """Date object to represent SCHEDULED attribute.""" + _re = compile_sdc_re('SCHEDULED') _active_default = True class OrgDateDeadline(OrgDateSDCBase): """Date object to represent DEADLINE attribute.""" + _re = compile_sdc_re('DEADLINE') _active_default = True class OrgDateClosed(OrgDateSDCBase): """Date object to represent CLOSED attribute.""" + _re = compile_sdc_re('CLOSED') _active_default = False def parse_sdc(string): - return (OrgDateScheduled.from_str(string), - OrgDateDeadline.from_str(string), - OrgDateClosed.from_str(string)) + return (OrgDateScheduled.from_str(string), OrgDateDeadline.from_str(string), OrgDateClosed.from_str(string)) class OrgDateClock(OrgDate): - """ Date object to represent CLOCK attributes. @@ -461,11 +602,13 @@ class OrgDateClock(OrgDate): _active_default = False - def __init__(self, start, end, duration=None, active=None): + _allow_short_range = False + + def __init__(self, start, end=None, duration=None, active=None): """ Create OrgDateClock object """ - super(OrgDateClock, self).__init__(start, end, active=active) + super().__init__(start, end, active=active) self._duration = duration @property @@ -476,8 +619,8 @@ def duration(self): >>> duration = OrgDateClock.from_str( ... 'CLOCK: [2010-08-08 Sun 17:00]--[2010-08-08 Sun 17:30] => 0:30' ... ).duration - >>> duration - datetime.timedelta(0, 1800) + >>> duration.seconds + 1800 >>> total_minutes(duration) 30.0 @@ -498,11 +641,10 @@ def is_duration_consistent(self): False """ - return (self._duration is None or - self._duration == total_minutes(self.duration)) + return self._duration is None or self._duration == total_minutes(self.duration) @classmethod - def from_str(cls, line): + def from_str(cls, line: str) -> OrgDateClock: """ Get CLOCK from given string. @@ -513,51 +655,67 @@ def from_str(cls, line): match = cls._re.search(line) if not match: return cls(None, None) - groups = [int(d) for d in match.groups()] - ymdhm1 = groups[:5] - ymdhm2 = groups[5:10] - hm3 = groups[10:] + + ymdhm1 = [int(d) for d in match.groups()[:5]] + + # second part starting with "--", does not exist for open clock dates + has_end = bool(match.group(6)) + ymdhm2_dt: Optional[datetime.datetime] + len_min: Optional[int] + if has_end: + ymdhm2 = [int(d) for d in match.groups()[6:11]] + hm3 = [int(d) for d in match.groups()[11:]] + + ymdhm2_dt = datetime.datetime(*ymdhm2) # type: ignore[arg-type] + len_min = hm3[0] * 60 + hm3[1] + else: + ymdhm2_dt = None + len_min = None + return cls( - datetime.datetime(*ymdhm1), - datetime.datetime(*ymdhm2), - hm3[0] * 60 + hm3[1], + datetime.datetime(*ymdhm1), # type: ignore[arg-type] + ymdhm2_dt, + len_min, ) _re = re.compile( - r'CLOCK:\s+' - r'\[(\d+)\-(\d+)\-(\d+)[^\]\d]*(\d+)\:(\d+)\]--' - r'\[(\d+)\-(\d+)\-(\d+)[^\]\d]*(\d+)\:(\d+)\]\s+=>\s+(\d+)\:(\d+)' - ) + r'^(?!#).*CLOCK:\s+' + r'\[(\d+)\-(\d+)\-(\d+)[^\]\d]*(\d+)\:(\d+)\]' + r'(--\[(\d+)\-(\d+)\-(\d+)[^\]\d]*(\d+)\:(\d+)\]\s+=>\s+(\d+)\:(\d+))?' + ) class OrgDateRepeatedTask(OrgDate): - """ Date object to represent repeated tasks. """ _active_default = False - def __init__(self, start, before, after, active=None): - super(OrgDateRepeatedTask, self).__init__(start, active=active) + def __init__(self, start, before: str, after: str, active=None) -> None: + super().__init__(start, active=active) self._before = before self._after = after - def __repr__(self): - args = [self._date_to_tuple(self.start), self.before, self.after] + def __repr__(self) -> str: + args: list = [self._date_to_tuple(self.start), self.before, self.after] if self._active is not self._active_default: args.append(self._active) - return '{0}({1})'.format( - self.__class__.__name__, ', '.join(map(repr, args))) + return '{}({})'.format(self.__class__.__name__, ', '.join(map(repr, args))) + + def __hash__(self) -> int: + return hash((self._before, self._after)) - def __eq__(self, other): - return super(OrgDateRepeatedTask, self).__eq__(other) and \ - isinstance(other, self.__class__) and \ - self._before == other._before and \ - self._after == other._after + def __eq__(self, other) -> bool: + return ( + super().__eq__(other) + and isinstance(other, self.__class__) + and self._before == other._before + and self._after == other._after + ) @property - def before(self): + def before(self) -> str: """ The state of task before marked as done. @@ -569,7 +727,7 @@ def before(self): return self._before @property - def after(self): + def after(self) -> str: """ The state of task after marked as done. diff --git a/src/orgparse/extra.py b/src/orgparse/extra.py new file mode 100644 index 0000000..e89343e --- /dev/null +++ b/src/orgparse/extra.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import re +from collections.abc import Iterator, Sequence +from typing import Optional, Union + +RE_TABLE_SEPARATOR = re.compile(r'\s*\|(\-+\+)*\-+\|') +RE_TABLE_ROW = re.compile(r'\s*\|([^|]+)+\|') +STRIP_CELL_WHITESPACE = True + + +Row = Sequence[str] + + +class Table: + def __init__(self, lines: list[str]) -> None: + self._lines = lines + + @property + def blocks(self) -> Iterator[Sequence[Row]]: + group: list[Row] = [] + first = True + for r in self._pre_rows(): + if r is None: + if not first or len(group) > 0: + yield group + first = False + group = [] + else: + group.append(r) + if len(group) > 0: + yield group + + def __iter__(self) -> Iterator[Row]: + return self.rows + + @property + def rows(self) -> Iterator[Row]: + for r in self._pre_rows(): + if r is not None: + yield r + + def _pre_rows(self) -> Iterator[Optional[Row]]: + for l in self._lines: + if RE_TABLE_SEPARATOR.match(l): + yield None + else: + pr = l.strip().strip('|').split('|') + if STRIP_CELL_WHITESPACE: + pr = [x.strip() for x in pr] + yield pr + # TODO use iparse helper? + + @property + def as_dicts(self) -> AsDictHelper: + bl = list(self.blocks) + if len(bl) != 2: + raise RuntimeError('Need two-block table to non-ambiguously guess column names') + hrows = bl[0] + if len(hrows) != 1: + raise RuntimeError(f'Need single row heading to guess column names, got: {hrows}') + columns = hrows[0] + assert len(set(columns)) == len(columns), f'Duplicate column names: {columns}' + return AsDictHelper( + columns=columns, + rows=bl[1], + ) + + +class AsDictHelper: + def __init__(self, columns: Sequence[str], rows: Sequence[Row]) -> None: + self.columns = columns + self._rows = rows + + def __iter__(self) -> Iterator[dict[str, str]]: + for x in self._rows: + yield dict(zip(self.columns, x)) + + +class Gap: + # todo later, add indices etc + pass + + +Rich = Union[Table, Gap] + + +def to_rich_text(text: str) -> Iterator[Rich]: + ''' + Convert an org-mode text into a 'rich' text, e.g. tables/lists/etc, interleaved by gaps. + NOTE: you shouldn't rely on the number of items returned by this function, + it might change in the future when more types are supported. + + At the moment only tables are supported. + ''' + lines = text.splitlines(keepends=True) + group: list[str] = [] + last: type[Rich] = Gap + + def emit() -> Rich: + nonlocal group, last + if last is Gap: + res = Gap() + elif last is Table: + res = Table(group) # type: ignore[assignment] + else: + raise RuntimeError(f'Unexpected type {last}') + group = [] + return res + + for line in lines: + if RE_TABLE_ROW.match(line) or RE_TABLE_SEPARATOR.match(line): + cur = Table + else: + cur = Gap # type: ignore[assignment] + if cur is not last: + if len(group) > 0: + yield emit() + last = cur + group.append(line) + if len(group) > 0: + yield emit() diff --git a/orgparse/inline.py b/src/orgparse/inline.py similarity index 87% rename from orgparse/inline.py rename to src/orgparse/inline.py index 043c99d..a2057fc 100644 --- a/orgparse/inline.py +++ b/src/orgparse/inline.py @@ -25,9 +25,7 @@ def to_plain_text(org_text): See also: info:org#Link format """ - return RE_LINK.sub( - lambda m: m.group('desc0') or m.group('desc1'), - org_text) + return RE_LINK.sub(lambda m: m.group('desc0') or m.group('desc1'), org_text) RE_LINK = re.compile( @@ -45,4 +43,5 @@ def to_plain_text(org_text): \] \] ) """, - re.VERBOSE) + re.VERBOSE, +) diff --git a/orgparse/node.py b/src/orgparse/node.py similarity index 64% rename from orgparse/node.py rename to src/orgparse/node.py index bd3afc3..5794b43 100644 --- a/orgparse/node.py +++ b/src/orgparse/node.py @@ -1,17 +1,30 @@ -import re -import itertools -try: - from collections import Sequence -except ImportError: - from collections.abc import Sequence +from __future__ import annotations -from .date import OrgDate, OrgDateClock, OrgDateRepeatedTask, parse_sdc +import itertools +import re +from collections.abc import Iterable, Iterator, Sequence +from typing import ( + Any, + Optional, + Union, + cast, +) + +from .date import ( + OrgDate, + OrgDateClock, + OrgDateClosed, + OrgDateDeadline, + OrgDateRepeatedTask, + OrgDateScheduled, + parse_sdc, +) +from .extra import Rich, to_rich_text from .inline import to_plain_text -from .utils.py3compat import PY3, unicode -def lines_to_chunks(lines): - chunk = [] +def lines_to_chunks(lines: Iterable[str]) -> Iterable[list[str]]: + chunk: list[str] = [] for l in lines: if RE_NODE_HEADER.search(l): yield chunk @@ -19,10 +32,11 @@ def lines_to_chunks(lines): chunk.append(l) yield chunk + RE_NODE_HEADER = re.compile(r"^\*+ ") -def parse_heading_level(heading): +def parse_heading_level(heading: str) -> tuple[str, int] | None: """ Get star-stripped heading and its level @@ -30,17 +44,21 @@ def parse_heading_level(heading): ('Heading', 1) >>> parse_heading_level('******** Heading') ('Heading', 8) + >>> parse_heading_level('*') # None since no space after star + >>> parse_heading_level('*bold*') # None >>> parse_heading_level('not heading') # None """ - match = RE_HEADING_STARS.search(heading) - if match: - return (match.group(2), len(match.group(1))) + m = RE_HEADING_STARS.search(heading) + if m is not None: + return (m.group(2), len(m.group(1))) + return None + -RE_HEADING_STARS = re.compile('^(\*+)\s*(.*?)\s*$') +RE_HEADING_STARS = re.compile(r'^(\*+)\s+(.*?)\s*$') -def parse_heading_tags(heading): +def parse_heading_tags(heading: str) -> tuple[str, list[str]]: """ Get first tags and heading without tags @@ -71,10 +89,12 @@ def parse_heading_tags(heading): tags = [] return (heading, tags) -RE_HEADING_TAGS = re.compile(r'(.*?)\s*:([a-zA-Z0-9@_:]+):\s*$') +# Tags are normal words containing letters, numbers, '_', and '@'. https://orgmode.org/manual/Tags.html +RE_HEADING_TAGS = re.compile(r'(.*?)\s*:([\w@:]+):\s*$') -def parse_heading_todos(heading, todo_candidates): + +def parse_heading_todos(heading: str, todo_candidates: list[str]) -> tuple[str, Optional[str]]: """ Get TODO keyword and heading without TODO keyword. @@ -86,15 +106,16 @@ def parse_heading_todos(heading, todo_candidates): """ for todo in todo_candidates: - todows = '{0} '.format(todo) - if heading.startswith(todows): - return (heading[len(todows):], todo) + if heading == todo: + return ('', todo) + if heading.startswith(todo + ' '): + return (heading[len(todo) + 1 :], todo) return (heading, None) def parse_heading_priority(heading): """ - Get priority and heading without priority field.. + Get priority and heading without priority field. >>> parse_heading_priority('HEADING') ('HEADING', None) @@ -112,10 +133,13 @@ def parse_heading_priority(heading): else: return (heading, None) + RE_HEADING_PRIORITY = re.compile(r'^\s*\[#([A-Z0-9])\] ?(.*)$') +PropertyValue = Union[str, int, float] + -def parse_property(line): +def parse_property(line: str) -> tuple[Optional[str], Optional[PropertyValue]]: """ Get property from given string. @@ -126,33 +150,165 @@ def parse_property(line): """ prop_key = None - prop_val = None + prop_val: Optional[Union[str, int, float]] = None match = RE_PROP.search(line) if match: prop_key = match.group(1) prop_val = match.group(2) if prop_key == 'Effort': - (h, m) = prop_val.split(":", 2) - if h.isdigit() and m.isdigit(): - prop_val = int(h) * 60 + int(m) + prop_val = parse_duration_to_minutes(prop_val) return (prop_key, prop_val) -RE_PROP = re.compile('^\s*:(.*?):\s*(.*?)\s*$') +RE_PROP = re.compile(r'^\s*:(.*?):\s*(.*?)\s*$') + + +def parse_duration_to_minutes(duration: str) -> Union[float, int]: + """ + Parse duration minutes from given string. + Convert to integer if number has no decimal points + + >>> parse_duration_to_minutes('3:12') + 192 + >>> parse_duration_to_minutes('1:23:45') + 83.75 + >>> parse_duration_to_minutes('1y 3d 3h 4min') + 530464 + >>> parse_duration_to_minutes('1d3h5min') + 1625 + >>> parse_duration_to_minutes('3d 13:35') + 5135 + >>> parse_duration_to_minutes('2.35h') + 141 + >>> parse_duration_to_minutes('10') + 10 + >>> parse_duration_to_minutes('10.') + 10 + >>> parse_duration_to_minutes('1 h') + 60 + >>> parse_duration_to_minutes('') + 0 + """ + + minutes = parse_duration_to_minutes_float(duration) + return int(minutes) if minutes.is_integer() else minutes -def parse_comment(line): + +def parse_duration_to_minutes_float(duration: str) -> float: + """ + Parse duration minutes from given string. + The following code is fully compatible with the 'org-duration-to-minutes' function in org mode: + https://github.com/emacs-mirror/emacs/blob/master/lisp/org/org-duration.el + + >>> parse_duration_to_minutes_float('3:12') + 192.0 + >>> parse_duration_to_minutes_float('1:23:45') + 83.75 + >>> parse_duration_to_minutes_float('1y 3d 3h 4min') + 530464.0 + >>> parse_duration_to_minutes_float('1d3h5min') + 1625.0 + >>> parse_duration_to_minutes_float('3d 13:35') + 5135.0 + >>> parse_duration_to_minutes_float('2.35h') + 141.0 + >>> parse_duration_to_minutes_float('10') + 10.0 + >>> parse_duration_to_minutes_float('10.') + 10.0 + >>> parse_duration_to_minutes_float('1 h') + 60.0 + >>> parse_duration_to_minutes_float('') + 0.0 + """ + + match: Optional[Any] + if duration == "": + return 0.0 + if isinstance(duration, float): + return float(duration) + if RE_ORG_DURATION_H_MM.fullmatch(duration): + hours, minutes, *seconds_ = map(float, duration.split(":")) + seconds = seconds_[0] if seconds_ else 0 + return seconds / 60.0 + minutes + 60 * hours + if RE_ORG_DURATION_FULL.fullmatch(duration): + minutes = 0 + for match in RE_ORG_DURATION_UNIT.finditer(duration): + value = float(match.group(1)) + unit = match.group(2) + minutes += value * ORG_DURATION_UNITS[unit] + return float(minutes) + match = RE_ORG_DURATION_MIXED.fullmatch(duration) + if match: + units_part = match.groupdict()['A'] + hms_part = match.groupdict()['B'] + return parse_duration_to_minutes_float(units_part) + parse_duration_to_minutes_float(hms_part) + if RE_FLOAT.fullmatch(duration): + return float(duration) + raise ValueError(f"Invalid duration format {duration}") + + +# Conversion factor to minutes for a duration. +ORG_DURATION_UNITS = { + "min": 1, + "h": 60, + "d": 60 * 24, + "w": 60 * 24 * 7, + "m": 60 * 24 * 30, + "y": 60 * 24 * 365.25, +} +# Regexp matching for all units. +ORG_DURATION_UNITS_RE = r'({})'.format(r'|'.join(ORG_DURATION_UNITS.keys())) +# Regexp matching a duration expressed with H:MM or H:MM:SS format. +# Hours can use any number of digits. +ORG_DURATION_H_MM_RE = r'[ \t]*[0-9]+(?::[0-9]{2}){1,2}[ \t]*' +RE_ORG_DURATION_H_MM = re.compile(ORG_DURATION_H_MM_RE) +# Regexp matching a duration with an unit. +# Allowed units are defined in ORG_DURATION_UNITS. +# Match group 1 contains the bare number. +# Match group 2 contains the unit. +ORG_DURATION_UNIT_RE = r'([0-9]+(?:[.][0-9]*)?)[ \t]*' + ORG_DURATION_UNITS_RE +RE_ORG_DURATION_UNIT = re.compile(ORG_DURATION_UNIT_RE) +# Regexp matching a duration expressed with units. +# Allowed units are defined in ORG_DURATION_UNITS. +ORG_DURATION_FULL_RE = rf'(?:[ \t]*{ORG_DURATION_UNIT_RE})+[ \t]*' +RE_ORG_DURATION_FULL = re.compile(ORG_DURATION_FULL_RE) +# Regexp matching a duration expressed with units and H:MM or H:MM:SS format. +# Allowed units are defined in ORG_DURATION_UNITS. +# Match group A contains units part. +# Match group B contains H:MM or H:MM:SS part. +ORG_DURATION_MIXED_RE = rf'(?P([ \t]*{ORG_DURATION_UNIT_RE})+)[ \t]*(?P[0-9]+(?::[0-9][0-9]){{1,2}})[ \t]*' +RE_ORG_DURATION_MIXED = re.compile(ORG_DURATION_MIXED_RE) +# Regexp matching float numbers. +RE_FLOAT = re.compile(r'[0-9]+([.][0-9]*)?') + + +# -> Optional[Tuple[str, Sequence[str]]]: # todo wtf?? it says 'ABCMeta isn't subscriptable??' +def parse_comment(line: str): """ Parse special comment such as ``#+SEQ_TODO`` >>> parse_comment('#+SEQ_TODO: TODO | DONE') - ('SEQ_TODO', 'TODO | DONE') + ('SEQ_TODO', ['TODO | DONE']) >>> parse_comment('# not a special comment') # None + >>> parse_comment('#+FILETAGS: :tag1:tag2:') + ('FILETAGS', ['tag1', 'tag2']) """ - if line.startswith('#+'): - comment = line.lstrip('#+').split(':', 1) - if len(comment) == 2: - return (comment[0], comment[1].strip()) + match = re.match(r'\s*#\+', line) + if match: + end = match.end(0) + comment = line[end:].split(':', maxsplit=1) + if len(comment) >= 2: + key = comment[0] + value = comment[1].strip() + if key.upper() == 'FILETAGS': + # just legacy behaviour; it seems like filetags is the only one that separated by ':' + # see https://orgmode.org/org.html#In_002dbuffer-Settings + return (key, [c.strip() for c in value.split(':') if len(c.strip()) > 0]) + else: + return (key, [value]) + return None def parse_seq_todo(line): @@ -180,26 +336,35 @@ def parse_seq_todo(line): else: (todos, dones) = (line, '') strip_fast_access_key = lambda x: x.split('(', 1)[0] - return (list(map(strip_fast_access_key, todos.split())), - list(map(strip_fast_access_key, dones.split()))) - + return ( + list(map(strip_fast_access_key, todos.split())), + list(map(strip_fast_access_key, dones.split())), + ) -class OrgEnv(object): +class OrgEnv: """ Information global to the file (e.g, TODO keywords). """ - def __init__(self, todos=['TODO'], dones=['DONE'], - filename=''): + def __init__( + self, + todos: Sequence[str] | None = None, + dones: Sequence[str] | None = None, + filename: str = '', + ) -> None: + if dones is None: + dones = ['DONE'] + if todos is None: + todos = ['TODO'] self._todos = list(todos) self._dones = list(dones) self._todo_not_specified_in_comment = True self._filename = filename - self._nodes = [] + self._nodes: list[OrgBaseNode] = [] @property - def nodes(self): + def nodes(self) -> list[OrgBaseNode]: """ A list of org nodes. @@ -265,15 +430,12 @@ def all_todo_keys(self): return self._todos + self._dones @property - def filename(self): + def filename(self) -> str: """ Return a path to the source file or similar information. If the org objects are not loaded from a file, this value will be a string of the form ````. - - :rtype: str - """ return self._filename @@ -286,7 +448,6 @@ def from_chunks(self, chunks): class OrgBaseNode(Sequence): - """ Base class for :class:`OrgRootNode` and :class:`OrgNode` @@ -323,7 +484,7 @@ class OrgBaseNode(Sequence): *** Heading 3 * Heading 4 - It also support sequence protocol. + It also supports sequence protocol. >>> print(root[1]) * Heading 1 @@ -340,20 +501,24 @@ class OrgBaseNode(Sequence): ** Heading 2 *** Heading 3 - """ + Nodes remember the line number information (1-indexed): - def __init__(self, env, index=None): - """ - Create a :class:`OrgBaseNode` object. + >>> print(root.children[1].linenumber) + 5 + """ - :type env: :class:`OrgEnv` - :arg env: This will be set to the :attr:`env` attribute. + _body_lines: list[str] # set by the child classes - """ + def __init__(self, env: OrgEnv, index: int | None = None) -> None: self.env = env + self.linenumber = cast(int, None) # set in parse_lines + # content - self._lines = [] + self._lines: list[str] = [] + + self._properties: dict[str, PropertyValue] = {} + self._timestamps: list[OrgDate] = [] # FIXME: use `index` argument to set index. (Currently it is # done externally in `parse_lines`.) @@ -374,45 +539,45 @@ def __init__(self, env, index=None): def __iter__(self): yield self level = self.level - for node in self.env._nodes[self._index + 1:]: + for node in self.env._nodes[self._index + 1 :]: if node.level > level: yield node + else: + break - def __len__(self): + def __len__(self) -> int: return sum(1 for _ in self) - def __nonzero__(self): + def __bool__(self) -> bool: # As self.__len__ returns non-zero value always this is not # needed. This function is only for performance. return True - __bool__ = __nonzero__ # PY3 - def __getitem__(self, key): if isinstance(key, slice): return itertools.islice(self, key.start, key.stop, key.step) elif isinstance(key, int): if key < 0: key += len(self) - for (i, node) in enumerate(self): + for i, node in enumerate(self): if i == key: return node - raise IndexError("Out of range {0}".format(key)) + raise IndexError(f"Out of range {key}") else: - raise TypeError("Inappropriate type {0} for {1}" - .format(type(key), type(self))) + raise TypeError(f"Inappropriate type {type(key)} for {type(self)}") # tree structure - def _find_same_level(self, iterable): + def _find_same_level(self, iterable) -> OrgBaseNode | None: for node in iterable: if node.level < self.level: - return + return None if node.level == self.level: return node + return None @property - def previous_same_level(self): + def previous_same_level(self) -> OrgBaseNode | None: """ Return previous node if exists or None otherwise. @@ -431,10 +596,10 @@ def previous_same_level(self): True """ - return self._find_same_level(reversed(self.env._nodes[:self._index])) + return self._find_same_level(reversed(self.env._nodes[: self._index])) @property - def next_same_level(self): + def next_same_level(self) -> OrgBaseNode | None: """ Return next node if exists or None otherwise. @@ -453,15 +618,16 @@ def next_same_level(self): True """ - return self._find_same_level(self.env._nodes[self._index + 1:]) + return self._find_same_level(self.env._nodes[self._index + 1 :]) # FIXME: cache parent node def _find_parent(self): - for node in reversed(self.env._nodes[:self._index]): + for node in reversed(self.env._nodes[: self._index]): if node.level < self.level: return node + return None - def get_parent(self, max_level=None): + def get_parent(self, max_level: int | None = None): """ Return a parent node. @@ -470,9 +636,9 @@ def get_parent(self, max_level=None): of the ancestor node to return. For example, ``get_parent(max_level=0)`` returns a root node. - In general case, it specify a maximum level of the + In the general case, it specify a maximum level of the desired ancestor node. If there is no ancestor node - which level is equal to ``max_level``, this function + whose level is equal to ``max_level``, this function try to find an ancestor node which level is smaller than ``max_level``. @@ -547,8 +713,11 @@ def parent(self): # FIXME: cache children nodes def _find_children(self): - nodeiter = iter(self.env._nodes[self._index + 1:]) - node = next(nodeiter) + nodeiter = iter(self.env._nodes[self._index + 1 :]) + try: + node = next(nodeiter) + except StopIteration: + return if node.level <= self.level: return yield node @@ -579,7 +748,7 @@ def children(self): >>> c2 is n4 True - Note the difference to ``n1[1:]``, which returns the Node 3 also.: + Note the difference to ``n1[1:]``, which returns the Node 3 also: >>> (m1, m2, m3) = list(n1[1:]) >>> m2 is n3 @@ -607,6 +776,37 @@ def root(self): return root root = parent + @property + def properties(self) -> dict[str, PropertyValue]: + """ + Node properties as a dictionary. + + >>> from orgparse import loads + >>> root = loads(''' + ... * Node + ... :PROPERTIES: + ... :SomeProperty: value + ... :END: + ... ''') + >>> root.children[0].properties['SomeProperty'] + 'value' + + """ + return self._properties + + def get_property(self, key, val=None) -> Optional[PropertyValue]: + """ + Return property named ``key`` if exists or ``val`` otherwise. + + :arg str key: + Key of property. + + :arg val: + Default value to return. + + """ + return self._properties.get(key, val) + # parser @classmethod @@ -617,46 +817,59 @@ def from_chunk(cls, env, lines): return self def _parse_comments(self): - special_comments = {} + special_comments: dict[str, list[str]] = {} for line in self._lines: parsed = parse_comment(line) if parsed: - (key, val) = parsed - special_comments.setdefault(key, []).append(val) + (key, vals) = parsed + key = key.upper() # case insensitive, so keep as uppercase + special_comments.setdefault(key, []).extend(vals) self._special_comments = special_comments # parse TODO keys and store in OrgEnv for todokey in ['TODO', 'SEQ_TODO', 'TYP_TODO']: for val in special_comments.get(todokey, []): self.env.add_todo_keys(*parse_seq_todo(val)) + def _iparse_properties(self, ilines: Iterator[str]) -> Iterator[str]: + self._properties = {} + in_property_field = False + for line in ilines: + if in_property_field: + if line.find(":END:") >= 0: + break + else: + (key, val) = parse_property(line) + if key is not None and val is not None: + self._properties.update({key: val}) + elif line.find(":PROPERTIES:") >= 0: + in_property_field = True + else: + yield line + for line in ilines: + yield line + # misc @property - def level(self): + def level(self) -> int: """ Level of this node. - - :rtype: int - """ - raise NotImplemented + raise NotImplementedError - def _get_tags(self, inher=False): + def _get_tags(self, *, inher: bool = False) -> set[str]: # noqa: ARG002 """ Return tags - :arg bool inher: + :arg inher: Mix with tags of all ancestor nodes if ``True``. - - :rtype: set - """ return set() @property - def tags(self): + def tags(self) -> set[str]: """ - Tag of this and parents node. + Tags of this and parent's node. >>> from orgparse import loads >>> n2 = loads(''' @@ -670,7 +883,7 @@ def tags(self): return self._get_tags(inher=True) @property - def shallow_tags(self): + def shallow_tags(self) -> set[str]: """ Tags defined for this node (don't look-up parent nodes). @@ -685,6 +898,40 @@ def shallow_tags(self): """ return self._get_tags(inher=False) + @staticmethod + def _get_text(text, format: str = 'plain'): # noqa: A002 + if format == 'plain': + return to_plain_text(text) + elif format == 'raw': + return text + elif format == 'rich': + return to_rich_text(text) + else: + raise ValueError(f'format={format} is not supported.') + + def get_body(self, format: str = 'plain') -> str: # noqa: A002 + """ + Return a string of body text. + + See also: :meth:`get_heading`. + + """ + return self._get_text('\n'.join(self._body_lines), format) if self._lines else '' + + @property + def body(self) -> str: + """Alias of ``.get_body(format='plain')``.""" + return self.get_body() + + @property + def body_rich(self) -> Iterator[Rich]: + r = self.get_body(format='rich') + return cast(Iterator[Rich], r) # meh.. + + @property + def heading(self) -> str: + raise NotImplementedError + def is_root(self): """ Return ``True`` when it is a root node. @@ -700,42 +947,180 @@ def is_root(self): """ return False - def __unicode__(self): - return unicode("\n").join(self._lines) + def get_timestamps(self, active=False, inactive=False, range=False, point=False): # noqa: FBT002,A002 # will fix later + """ + Return a list of timestamps in the body text. - if PY3: - __str__ = __unicode__ - else: - def __str__(self): - return unicode(self).encode('utf-8') + :type active: bool + :arg active: Include active type timestamps. + :type inactive: bool + :arg inactive: Include inactive type timestamps. + :type range: bool + :arg range: Include timestamps which has end date. + :type point: bool + :arg point: Include timestamps which has no end date. + :rtype: list of :class:`orgparse.date.OrgDate` subclasses -class OrgRootNode(OrgBaseNode): + Consider the following org node: + + >>> from orgparse import loads + >>> node = loads(''' + ... * Node + ... CLOSED: [2012-02-26 Sun 21:15] SCHEDULED: <2012-02-26 Sun> + ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 + ... Some inactive timestamp [2012-02-23 Thu] in body text. + ... Some active timestamp <2012-02-24 Fri> in body text. + ... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon]. + ... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>. + ... ''').children[0] + + The default flags are all off, so it does not return anything. + + >>> node.get_timestamps() + [] + + You can fetch appropriate timestamps using keyword arguments. + + >>> node.get_timestamps(inactive=True, point=True) + [OrgDate((2012, 2, 23), None, False)] + >>> node.get_timestamps(active=True, point=True) + [OrgDate((2012, 2, 24))] + >>> node.get_timestamps(inactive=True, range=True) + [OrgDate((2012, 2, 25), (2012, 2, 27), False)] + >>> node.get_timestamps(active=True, range=True) + [OrgDate((2012, 2, 26), (2012, 2, 28))] + + This is more complex example. Only active timestamps, + regardless of range/point type. + + >>> node.get_timestamps(active=True, point=True, range=True) + [OrgDate((2012, 2, 24)), OrgDate((2012, 2, 26), (2012, 2, 28))] + + """ + return [ + ts + for ts in self._timestamps + if ( + ((active and ts.is_active()) or (inactive and not ts.is_active())) + and ((range and ts.has_end()) or (point and not ts.has_end())) + ) + ] + + @property + def datelist(self): + """ + Alias of ``.get_timestamps(active=True, inactive=True, point=True)``. + + :rtype: list of :class:`orgparse.date.OrgDate` subclasses + + >>> from orgparse import loads + >>> root = loads(''' + ... * Node with point dates <2012-02-25 Sat> + ... CLOSED: [2012-02-25 Sat 21:15] + ... Some inactive timestamp [2012-02-26 Sun] in body text. + ... Some active timestamp <2012-02-27 Mon> in body text. + ... ''') + >>> root.children[0].datelist # doctest: +NORMALIZE_WHITESPACE + [OrgDate((2012, 2, 25)), + OrgDate((2012, 2, 26), None, False), + OrgDate((2012, 2, 27))] + + """ + return self.get_timestamps(active=True, inactive=True, point=True) + + @property + def rangelist(self): + """ + Alias of ``.get_timestamps(active=True, inactive=True, range=True)``. + + :rtype: list of :class:`orgparse.date.OrgDate` subclasses + + >>> from orgparse import loads + >>> root = loads(''' + ... * Node with range dates <2012-02-25 Sat>--<2012-02-28 Tue> + ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 + ... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon]. + ... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>. + ... Some time interval <2012-02-27 Mon 11:23-12:10>. + ... ''') + >>> root.children[0].rangelist # doctest: +NORMALIZE_WHITESPACE + [OrgDate((2012, 2, 25), (2012, 2, 28)), + OrgDate((2012, 2, 25), (2012, 2, 27), False), + OrgDate((2012, 2, 26), (2012, 2, 28)), + OrgDate((2012, 2, 27, 11, 23, 0), (2012, 2, 27, 12, 10, 0))] + + """ + return self.get_timestamps(active=True, inactive=True, range=True) + + def __str__(self) -> str: + return "\n".join(self._lines) + + # todo hmm, not sure if it really belongs here and not to OrgRootNode? + def get_file_property_list(self, property: str): # noqa: A002 + """ + Return a list of the selected property + """ + vals = self._special_comments.get(property.upper(), None) + return [] if vals is None else vals + + def get_file_property(self, property: str): # noqa: A002 + """ + Return a single element of the selected property or None if it doesn't exist + """ + vals = self._special_comments.get(property.upper(), None) + if vals is None: + return None + elif len(vals) == 1: + return vals[0] + else: + raise RuntimeError(f'Multiple values for property {property}: {vals}') + + +class OrgRootNode(OrgBaseNode): """ - Node to represent a file + Node to represent a file. Its body contains all lines before the first + headline See :class:`OrgBaseNode` for other available functions. - """ - # getter + @property + def heading(self) -> str: + return '' + + def _get_tags(self, *, inher: bool = False) -> set[str]: # noqa: ARG002 + filetags = self.get_file_property_list('FILETAGS') + return set(filetags) @property - def level(self): + def level(self) -> int: return 0 - def get_parent(self, max_level=None): + def get_parent(self, max_level=None): # noqa: ARG002 return None - # misc - - def is_root(self): + def is_root(self) -> bool: return True + # parsers -class OrgNode(OrgBaseNode): + def _parse_pre(self): + """Call parsers which must be called before tree structuring""" + ilines: Iterator[str] = iter(self._lines) + ilines = self._iparse_properties(ilines) + ilines = self._iparse_timestamps(ilines) + self._body_lines = list(ilines) + def _iparse_timestamps(self, ilines: Iterator[str]) -> Iterator[str]: + self._timestamps = [] + for line in ilines: + self._timestamps.extend(OrgDate.list_from_str(line)) + yield line + + +class OrgNode(OrgBaseNode): """ Node to represent normal org node @@ -743,21 +1128,20 @@ class OrgNode(OrgBaseNode): """ - def __init__(self, *args, **kwds): - super(OrgNode, self).__init__(*args, **kwds) - self._heading = None - self._level = None - self._tags = None - self._todo = None + def __init__(self, *args, **kwds) -> None: + super().__init__(*args, **kwds) + # fixme instead of casts, should organize code in such a way that they aren't necessary + self._heading = cast(str, None) + self._level: int | None = None + self._tags = cast(list[str], None) + self._todo: Optional[str] = None self._priority = None - self._properties = {} - self._scheduled = OrgDate(None) - self._deadline = OrgDate(None) - self._closed = OrgDate(None) - self._timestamps = [] - self._clocklist = [] - self._body_lines = [] - self._repeated_tasks = [] + self._scheduled = OrgDateScheduled(None) + self._deadline = OrgDateDeadline(None) + self._closed = OrgDateClosed(None) + self._clocklist: list[OrgDateClock] = [] + self._body_lines: list[str] = [] + self._repeated_tasks: list[OrgDateRepeatedTask] = [] # parser @@ -765,8 +1149,11 @@ def _parse_pre(self): """Call parsers which must be called before tree structuring""" self._parse_heading() # FIXME: make the following parsers "lazy" - ilines = iter(self._lines) - next(ilines) # skip heading + ilines: Iterator[str] = iter(self._lines) + try: + next(ilines) # skip heading + except StopIteration: + return ilines = self._iparse_sdc(ilines) ilines = self._iparse_clock(ilines) ilines = self._iparse_properties(ilines) @@ -774,12 +1161,13 @@ def _parse_pre(self): ilines = self._iparse_timestamps(ilines) self._body_lines = list(ilines) - def _parse_heading(self): + def _parse_heading(self) -> None: heading = self._lines[0] - (heading, self._level) = parse_heading_level(heading) + heading_level = parse_heading_level(heading) + if heading_level is not None: + (heading, self._level) = heading_level (heading, self._tags) = parse_heading_tags(heading) - (heading, self._todo) = parse_heading_todos( - heading, self.env.all_todo_keys) + (heading, self._todo) = parse_heading_todos(heading, self.env.all_todo_keys) (heading, self._priority) = parse_heading_priority(heading) self._heading = heading @@ -789,60 +1177,43 @@ def _parse_heading(self): # If the item returned by the input iterator must be dedicated to # the parser, do not yield the item or yield it as-is otherwise. - def _iparse_sdc(self, ilines): + def _iparse_sdc(self, ilines: Iterator[str]) -> Iterator[str]: """ Parse SCHEDULED, DEADLINE and CLOSED time tamps. They are assumed be in the first line. """ - line = next(ilines) + try: + line = next(ilines) + except StopIteration: + return (self._scheduled, self._deadline, self._closed) = parse_sdc(line) - if not (self._scheduled or - self._deadline or - self._closed): + if not (self._scheduled or self._deadline or self._closed): yield line # when none of them were found for line in ilines: yield line - def _iparse_clock(self, ilines): - self._clocklist = clocklist = [] + def _iparse_clock(self, ilines: Iterator[str]) -> Iterator[str]: + self._clocklist = [] for line in ilines: cl = OrgDateClock.from_str(line) if cl: - clocklist.append(cl) + self._clocklist.append(cl) else: yield line - def _iparse_timestamps(self, ilines): - self._timestamps = timestamps = [] - timestamps.extend(OrgDate.list_from_str(self._heading)) + def _iparse_timestamps(self, ilines: Iterator[str]) -> Iterator[str]: + self._timestamps = [] + self._timestamps.extend(OrgDate.list_from_str(self._heading)) for l in ilines: - timestamps.extend(OrgDate.list_from_str(l)) + self._timestamps.extend(OrgDate.list_from_str(l)) yield l - def _iparse_properties(self, ilines): - self._properties = properties = {} - in_property_field = False - for line in ilines: - if in_property_field: - if line.find(":END:") >= 0: - break - else: - (key, val) = parse_property(line) - if key: - properties.update({key: val}) - elif line.find(":PROPERTIES:") >= 0: - in_property_field = True - else: - yield line - for line in ilines: - yield line - - def _iparse_repeated_tasks(self, ilines): - self._repeated_tasks = repeated_tasks = [] + def _iparse_repeated_tasks(self, ilines: Iterator[str]) -> Iterator[str]: + self._repeated_tasks = [] for line in ilines: match = self._repeated_tasks_re.search(line) if match: @@ -851,31 +1222,20 @@ def _iparse_repeated_tasks(self, ilines): done_state = mdict['done'] todo_state = mdict['todo'] date = OrgDate.from_str(mdict['date']) - repeated_tasks.append( - OrgDateRepeatedTask(date.start, todo_state, done_state)) + self._repeated_tasks.append(OrgDateRepeatedTask(date.start, todo_state, done_state)) else: yield line _repeated_tasks_re = re.compile( r''' - \s+ - \s+ + \s*- \s+ State \s+ "(?P [^"]+)" \s+ from \s+ "(?P [^"]+)" \s+ \[ (?P [^\]]+) \]''', - re.VERBOSE) - - # getter + re.VERBOSE, + ) - @staticmethod - def _get_text(text, format='plain'): - if format == 'plain': - return to_plain_text(text) - elif format == 'raw': - return text - else: - raise ValueError('format={0} is not supported.'.format(format)) - - def get_heading(self, format='plain'): + def get_heading(self, format: str = 'plain') -> str: # noqa: A002 """ Return a string of head text without tags and TODO keywords. @@ -897,29 +1257,13 @@ def get_heading(self, format='plain'): """ return self._get_text(self._heading, format) - def get_body(self, format='plain'): - """ - Return a string of body text. - - See also: :meth:`get_heading`. - - """ - return self._get_text( - '\n'.join(self._body_lines), format) if self._lines else '' - @property - def heading(self): + def heading(self) -> str: """Alias of ``.get_heading(format='plain')``.""" return self.get_heading() - @property - def body(self): - """Alias of ``.get_body(format='plain')``.""" - return self.get_body() - @property def level(self): - return self._level """ Level attribute of this node. Top level node is level 1. @@ -928,7 +1272,7 @@ def level(self): ... * Node 1 ... ** Node 2 ... ''') - >>> (n1, n2) = root.children + >>> (n1, n2) = list(root[1:]) >>> root.level 0 >>> n1.level @@ -937,9 +1281,10 @@ def level(self): 2 """ + return self._level @property - def priority(self): + def priority(self) -> str | None: """ Priority attribute of this node. It is None if undefined. @@ -956,7 +1301,7 @@ def priority(self): """ return self._priority - def _get_tags(self, inher=False): + def _get_tags(self, *, inher: bool = False) -> set[str]: tags = set(self._tags) if inher: parent = self.get_parent() @@ -965,7 +1310,7 @@ def _get_tags(self, inher=False): return tags @property - def todo(self): + def todo(self) -> Optional[str]: """ A TODO keyword of this node if exists or None otherwise. @@ -977,37 +1322,6 @@ def todo(self): """ return self._todo - def get_property(self, key, val=None): - """ - Return property named ``key`` if exists or ``val`` otherwise. - - :arg str key: - Key of property. - - :arg val: - Default value to return. - - """ - return self._properties.get(key, val) - - @property - def properties(self): - """ - Node properties as a dictionary. - - >>> from orgparse import loads - >>> root = loads(''' - ... * Node - ... :PROPERTIES: - ... :SomeProperty: value - ... :END: - ... ''') - >>> root.children[0].properties['SomeProperty'] - 'value' - - """ - return self._properties - @property def scheduled(self): """ @@ -1080,123 +1394,16 @@ def clock(self): """ return self._clocklist - def get_timestamps(self, active=False, inactive=False, - range=False, point=False): - """ - Return a list of timestamps in the body text. - - :type active: bool - :arg active: Include active type timestamps. - :type inactive: bool - :arg inactive: Include inactive type timestamps. - :type range: bool - :arg range: Include timestamps which has end date. - :type point: bool - :arg point: Include timestamps which has no end date. - - :rtype: list of :class:`orgparse.date.OrgDate` subclasses - - - Consider the following org node: - - >>> from orgparse import loads - >>> node = loads(''' - ... * Node - ... CLOSED: [2012-02-26 Sun 21:15] SCHEDULED: <2012-02-26 Sun> - ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 - ... Some inactive timestamp [2012-02-23 Thu] in body text. - ... Some active timestamp <2012-02-24 Fri> in body text. - ... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon]. - ... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>. - ... ''').children[0] - - The default flags are all off, so it does not return anything. - - >>> node.get_timestamps() - [] - - You can fetch appropriate timestamps using keyword arguments. - - >>> node.get_timestamps(inactive=True, point=True) - [OrgDate((2012, 2, 23), None, False)] - >>> node.get_timestamps(active=True, point=True) - [OrgDate((2012, 2, 24))] - >>> node.get_timestamps(inactive=True, range=True) - [OrgDate((2012, 2, 25), (2012, 2, 27), False)] - >>> node.get_timestamps(active=True, range=True) - [OrgDate((2012, 2, 26), (2012, 2, 28))] - - This is more complex example. Only active timestamps, - regardless of range/point type. - - >>> node.get_timestamps(active=True, point=True, range=True) - [OrgDate((2012, 2, 24)), OrgDate((2012, 2, 26), (2012, 2, 28))] - - """ - return [ - ts for ts in self._timestamps if - (((active and ts.is_active()) or - (inactive and not ts.is_active())) and - ((range and ts.has_end()) or - (point and not ts.has_end())))] - - @property - def datelist(self): - """ - Alias of ``.get_timestamps(active=True, inactive=True, point=True)``. - - :rtype: list of :class:`orgparse.date.OrgDate` subclasses - - >>> from orgparse import loads - >>> root = loads(''' - ... * Node with point dates <2012-02-25 Sat> - ... CLOSED: [2012-02-25 Sat 21:15] - ... Some inactive timestamp [2012-02-26 Sun] in body text. - ... Some active timestamp <2012-02-27 Mon> in body text. - ... ''') - >>> root.children[0].datelist # doctest: +NORMALIZE_WHITESPACE - [OrgDate((2012, 2, 25)), - OrgDate((2012, 2, 26), None, False), - OrgDate((2012, 2, 27))] - - """ - return self.get_timestamps(active=True, inactive=True, point=True) - - @property - def rangelist(self): - """ - Alias of ``.get_timestamps(active=True, inactive=True, range=True)``. - - :rtype: list of :class:`orgparse.date.OrgDate` subclasses - - >>> from orgparse import loads - >>> root = loads(''' - ... * Node with range dates <2012-02-25 Sat>--<2012-02-28 Tue> - ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 - ... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon]. - ... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>. - ... ''') - >>> root.children[0].rangelist # doctest: +NORMALIZE_WHITESPACE - [OrgDate((2012, 2, 25), (2012, 2, 28)), - OrgDate((2012, 2, 25), (2012, 2, 27), False), - OrgDate((2012, 2, 26), (2012, 2, 28))] - - """ - return self.get_timestamps(active=True, inactive=True, range=True) - def has_date(self): """ Return ``True`` if it has any kind of timestamp """ - return (self.scheduled or - self.deadline or - self.datelist or - self.rangelist) + return self.scheduled or self.deadline or self.datelist or self.rangelist @property def repeated_tasks(self): """ - Get repeated tasks marked DONE in a entry having repeater. + Get repeated tasks marked DONE in an entry having repeater. :rtype: list of :class:`orgparse.date.OrgDateRepeatedTask` @@ -1240,13 +1447,26 @@ def repeated_tasks(self): return self._repeated_tasks -def parse_lines(lines, filename): - env = OrgEnv(filename=filename) +def parse_lines(lines: Iterable[str], filename, env=None) -> OrgNode: + if not env: + env = OrgEnv(filename=filename) + elif env.filename != filename: + raise ValueError('If env is specified, filename must match') + # parse into node of list (environment will be parsed) - nodelist = list(env.from_chunks(lines_to_chunks(lines))) + ch1, ch2 = itertools.tee(lines_to_chunks(lines)) + linenos = itertools.accumulate(itertools.chain([0], (len(c) for c in ch1))) + nodes = env.from_chunks(ch2) + nodelist = [] + for lineno, node in zip(linenos, nodes): + lineno += 1 # in text editors lines are 1-indexed + node.linenumber = lineno + nodelist.append(node) # parse headings (level, TODO, TAGs, and heading) nodelist[0]._index = 0 - for (i, node) in enumerate(nodelist[1:], 1): # nodes except root node + # parse the root node + nodelist[0]._parse_pre() + for i, node in enumerate(nodelist[1:], 1): # nodes except root node node._index = i node._parse_pre() env._nodes = nodelist diff --git a/orgparse/tests/__init__.py b/src/orgparse/py.typed similarity index 100% rename from orgparse/tests/__init__.py rename to src/orgparse/py.typed diff --git a/orgparse/tests/data/__init__.py b/src/orgparse/tests/__init__.py similarity index 100% rename from orgparse/tests/data/__init__.py rename to src/orgparse/tests/__init__.py diff --git a/orgparse/tests/data/00_simple.org b/src/orgparse/tests/data/00_simple.org similarity index 100% rename from orgparse/tests/data/00_simple.org rename to src/orgparse/tests/data/00_simple.org diff --git a/src/orgparse/tests/data/00_simple.py b/src/orgparse/tests/data/00_simple.py new file mode 100644 index 0000000..23ad86c --- /dev/null +++ b/src/orgparse/tests/data/00_simple.py @@ -0,0 +1,38 @@ +from typing import Any + + +def nodedict(i, level, todo=None, shallow_tags=None, tags=None) -> dict[str, Any]: + if tags is None: + tags = set() + if shallow_tags is None: + shallow_tags = set() + return { + "heading": f"Heading {i}", + "level": level, + "todo": todo, + "shallow_tags": shallow_tags, + "tags": tags, + } + + +def tags(nums) -> set[str]: + return set(map('TAG{0}'.format, nums)) + + +data = [ + nodedict(i, *vals) for (i, vals) in enumerate([ # type: ignore[misc] + [1, 'TODO1', tags([1]) , tags(range(1, 2))], + [2, 'TODO2', tags([2]) , tags(range(1, 3))], + [3, 'TODO3', tags([3]) , tags(range(1, 4))], + [4, 'TODO4', tags([4]) , tags(range(1, 5))], + [2, None , tags([]) , tags([1]) ], + [2, None , tags([]) , tags([1]) ], + [1, None , tags([2]) , tags([2]) ], + [2, None , tags([2]) , tags([2]) ], + [3, None , tags([]) , tags([2]) ], + [5, None , tags([3, 4]), tags([2, 3, 4]) ], + [4, None , tags([1]) , tags([1, 2]) ], + [2, None , tags([]) , tags([2]) ], + [1], + ]) +] # fmt: skip diff --git a/orgparse/tests/data/01_attributes.org b/src/orgparse/tests/data/01_attributes.org similarity index 82% rename from orgparse/tests/data/01_attributes.org rename to src/orgparse/tests/data/01_attributes.org index 9d737d9..99e202b 100644 --- a/orgparse/tests/data/01_attributes.org +++ b/src/orgparse/tests/data/01_attributes.org @@ -9,6 +9,7 @@ - <2010-08-16 Mon> DateList - <2010-08-07 Sat>--<2010-08-08 Sun> - <2010-08-09 Mon 00:30>--<2010-08-10 Tue 13:20> RangeList + - <2019-08-10 Sat 16:30-17:30> TimeRange * A node without any attributed * DONE [#A] A node with a lot of attributes SCHEDULED: <2010-08-06 Fri> DEADLINE: <2010-08-10 Tue> CLOSED: [2010-08-08 Sun 18:00] @@ -20,3 +21,9 @@ - <2010-08-16 Mon> DateList - <2010-08-07 Sat>--<2010-08-08 Sun> - <2010-08-09 Mon 00:30>--<2010-08-10 Tue 13:20> RangeList + - <2019-08-10 Sat 16:30-17:30> TimeRange +* range in deadline +DEADLINE: <2019-09-06 Fri 10:00--11:20> + body +* node with a second line but no date +body diff --git a/src/orgparse/tests/data/01_attributes.py b/src/orgparse/tests/data/01_attributes.py new file mode 100644 index 0000000..467df02 --- /dev/null +++ b/src/orgparse/tests/data/01_attributes.py @@ -0,0 +1,76 @@ +from typing import Any + +from orgparse.date import ( + OrgDate, + OrgDateClock, + OrgDateClosed, + OrgDateDeadline, + OrgDateScheduled, +) + +Raw = dict[str, Any] + +node1: Raw = { + "heading": "A node with a lot of attributes", + "priority": 'A', + "scheduled": OrgDateScheduled((2010, 8, 6)), + "deadline": OrgDateDeadline((2010, 8, 10)), + "closed": OrgDateClosed((2010, 8, 8, 18, 0)), + "clock": [ + OrgDateClock((2010, 8, 8, 17, 40), (2010, 8, 8, 17, 50), 10), + OrgDateClock((2010, 8, 8, 17, 00), (2010, 8, 8, 17, 30), 30), + ], + "properties": {"Effort": 70}, + "datelist": [OrgDate((2010, 8, 16))], + "rangelist": [ + OrgDate((2010, 8, 7), (2010, 8, 8)), + OrgDate((2010, 8, 9, 0, 30), (2010, 8, 10, 13, 20)), + OrgDate((2019, 8, 10, 16, 30, 0), (2019, 8, 10, 17, 30, 0)), + ], + "body": """\ + - <2010-08-16 Mon> DateList + - <2010-08-07 Sat>--<2010-08-08 Sun> + - <2010-08-09 Mon 00:30>--<2010-08-10 Tue 13:20> RangeList + - <2019-08-10 Sat 16:30-17:30> TimeRange""", +} + +node2: Raw = { + "heading": "A node without any attributed", + "priority": None, + "scheduled": OrgDateScheduled(None), + "deadline": OrgDateDeadline(None), + "closed": OrgDateClosed(None), + "clock": [], + "properties": {}, + "datelist": [], + "rangelist": [], + "body": "", +} + +node3: Raw = { + "heading": "range in deadline", + "priority": None, + "scheduled": OrgDateScheduled(None), + "deadline": OrgDateDeadline((2019, 9, 6, 10, 0), (2019, 9, 6, 11, 20)), + "closed": OrgDateClosed(None), + "clock": [], + "properties": {}, + "datelist": [], + "rangelist": [], + "body": " body", +} + +node4: Raw = { + "heading": "node with a second line but no date", + "priority": None, + "scheduled": OrgDateScheduled(None), + "deadline": OrgDateDeadline(None), + "closed": OrgDateClosed(None), + "clock": [], + "properties": {}, + "datelist": [], + "rangelist": [], + "body": "body", +} + +data = [node1, node2, node1, node3, node4] diff --git a/orgparse/tests/data/02_tree_struct.org b/src/orgparse/tests/data/02_tree_struct.org similarity index 100% rename from orgparse/tests/data/02_tree_struct.org rename to src/orgparse/tests/data/02_tree_struct.org diff --git a/orgparse/tests/data/02_tree_struct.py b/src/orgparse/tests/data/02_tree_struct.py similarity index 66% rename from orgparse/tests/data/02_tree_struct.py rename to src/orgparse/tests/data/02_tree_struct.py index 17e876e..86b6314 100644 --- a/orgparse/tests/data/02_tree_struct.py +++ b/src/orgparse/tests/data/02_tree_struct.py @@ -1,8 +1,15 @@ -def nodedict(parent, children=[], previous=None, next=None): - return dict(parent_heading=parent, - children_heading=children, - previous_same_level_heading=previous, - next_same_level_heading=next) +from typing import Any + + +def nodedict(parent, children=None, previous=None, next_=None) -> dict[str, Any]: + if children is None: + children = [] + return { + 'parent_heading': parent, + 'children_heading': children, + 'previous_same_level_heading': previous, + 'next_same_level_heading': next_, + } data = [nodedict(*args) for args in [ @@ -36,4 +43,6 @@ def nodedict(parent, children=[], previous=None, next=None): ('G6-H2',), ('G6-H2',), ('G6-H1', [], 'G6-H2'), -]] + # G7 + (None, [], 'G6-H1'), +]] # fmt: skip diff --git a/orgparse/tests/data/03_repeated_tasks.org b/src/orgparse/tests/data/03_repeated_tasks.org similarity index 100% rename from orgparse/tests/data/03_repeated_tasks.org rename to src/orgparse/tests/data/03_repeated_tasks.org diff --git a/orgparse/tests/data/03_repeated_tasks.py b/src/orgparse/tests/data/03_repeated_tasks.py similarity index 50% rename from orgparse/tests/data/03_repeated_tasks.py rename to src/orgparse/tests/data/03_repeated_tasks.py index 18cfe12..17336e0 100644 --- a/orgparse/tests/data/03_repeated_tasks.py +++ b/src/orgparse/tests/data/03_repeated_tasks.py @@ -1,13 +1,12 @@ -from orgparse.date import OrgDateRepeatedTask, OrgDateDeadline +from orgparse.date import OrgDateDeadline, OrgDateRepeatedTask - -data = [dict( - heading='Pay the rent', - todo='TODO', - deadline=OrgDateDeadline((2005, 10, 1)), - repeated_tasks=[ +data = [{ + 'heading': 'Pay the rent', + 'todo': 'TODO', + 'deadline': OrgDateDeadline((2005, 10, 1)), + 'repeated_tasks': [ OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 8, 1, 19, 44, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 7, 1, 17, 27, 0), 'TODO', 'DONE'), ] -)] +}] # fmt: skip diff --git a/orgparse/tests/data/04_logbook.org b/src/orgparse/tests/data/04_logbook.org similarity index 88% rename from orgparse/tests/data/04_logbook.org rename to src/orgparse/tests/data/04_logbook.org index 9155470..e89ec26 100644 --- a/orgparse/tests/data/04_logbook.org +++ b/src/orgparse/tests/data/04_logbook.org @@ -1,5 +1,6 @@ * LOGBOOK drawer test :LOGBOOK: + CLOCK: [2012-10-26 Fri 16:01] CLOCK: [2012-10-26 Fri 14:50]--[2012-10-26 Fri 15:00] => 0:10 CLOCK: [2012-10-26 Fri 14:30]--[2012-10-26 Fri 14:40] => 0:10 CLOCK: [2012-10-26 Fri 14:10]--[2012-10-26 Fri 14:20] => 0:10 diff --git a/orgparse/tests/data/04_logbook.py b/src/orgparse/tests/data/04_logbook.py similarity index 67% rename from orgparse/tests/data/04_logbook.py rename to src/orgparse/tests/data/04_logbook.py index 4c05a15..2443683 100644 --- a/orgparse/tests/data/04_logbook.py +++ b/src/orgparse/tests/data/04_logbook.py @@ -1,10 +1,11 @@ from orgparse.date import OrgDateClock -data = [dict( - heading='LOGBOOK drawer test', - clock=[ +data = [{ + 'heading': 'LOGBOOK drawer test', + 'clock': [ + OrgDateClock((2012, 10, 26, 16, 1)), OrgDateClock((2012, 10, 26, 14, 50), (2012, 10, 26, 15, 00)), OrgDateClock((2012, 10, 26, 14, 30), (2012, 10, 26, 14, 40)), OrgDateClock((2012, 10, 26, 14, 10), (2012, 10, 26, 14, 20)), ] -)] +}] # fmt: skip diff --git a/orgparse/tests/data/05_tags.org b/src/orgparse/tests/data/05_tags.org similarity index 88% rename from orgparse/tests/data/05_tags.org rename to src/orgparse/tests/data/05_tags.org index 9e6ca2a..651d7e0 100644 --- a/orgparse/tests/data/05_tags.org +++ b/src/orgparse/tests/data/05_tags.org @@ -6,3 +6,4 @@ * Node 5 :@_: * Node 6 :_tag_: * Heading: :with:colon: :tag: +* unicode :ёж:tag:háček: diff --git a/orgparse/tests/data/05_tags.py b/src/orgparse/tests/data/05_tags.py similarity index 50% rename from orgparse/tests/data/05_tags.py rename to src/orgparse/tests/data/05_tags.py index 85a4db2..19447f4 100644 --- a/orgparse/tests/data/05_tags.py +++ b/src/orgparse/tests/data/05_tags.py @@ -1,8 +1,8 @@ def nodedict(i, tags): - return dict( - heading="Node {0}".format(i), - tags=set(tags), - ) + return { + "heading": f"Node {i}", + "tags": set(tags), + } data = [ @@ -15,5 +15,7 @@ def nodedict(i, tags): [["@_"]], [["_tag_"]], ])] + [ - dict(heading='Heading: :with:colon:', tags=set(["tag"])), - ] + {"heading": 'Heading: :with:colon:', "tags": {"tag"}}, + ] + [ + {"heading": 'unicode', "tags": {'ёж', 'tag', 'háček'}}, + ] # fmt: skip diff --git a/orgparse/utils/__init__.py b/src/orgparse/tests/data/__init__.py similarity index 100% rename from orgparse/utils/__init__.py rename to src/orgparse/tests/data/__init__.py diff --git a/src/orgparse/tests/test_data.py b/src/orgparse/tests/test_data.py new file mode 100644 index 0000000..c271273 --- /dev/null +++ b/src/orgparse/tests/test_data.py @@ -0,0 +1,157 @@ +import pickle +from collections.abc import Iterator +from pathlib import Path + +import pytest + +from .. import load, loads + +DATADIR = Path(__file__).parent / 'data' + + +def load_data(path: Path): + """Load data from python file""" + ns = {} # type: ignore[var-annotated] + # read_bytes() and compile hackery to avoid encoding issues (e.g. see 05_tags) + exec(compile(path.read_bytes(), path, 'exec'), ns) + return ns['data'] + + +def value_from_data_key(node, key): + """ + Helper function for check_data. Get value from Orgnode by key. + """ + if key == 'tags_inher': + return node.tags + elif key == 'children_heading': + return [c.heading for c in node.children] + elif key in ( + 'parent_heading', + 'previous_same_level_heading', + 'next_same_level_heading', + ): + othernode = getattr(node, key.rsplit('_', 1)[0]) + if othernode and not othernode.is_root(): + return othernode.heading + else: + return None + else: + return getattr(node, key) + + +def data_path(dataname: str, ext: str) -> Path: + return DATADIR / f'{dataname}.{ext}' + + +def get_datanames() -> Iterator[str]: + for oname in sorted(DATADIR.glob('*.org')): + yield oname.stem + + +@pytest.mark.parametrize('dataname', get_datanames()) +def test_data(dataname): + """ + Compare parsed data from 'data/*.org' and its correct answer 'data/*.py' + """ + oname = data_path(dataname, "org") + data = load_data(data_path(dataname, "py")) + root = load(oname) + + for i, (node, kwds) in enumerate(zip(root[1:], data)): + for key in kwds: + val = value_from_data_key(node, key) + assert kwds[key] == val, ( + f'check value of {i}-th node of key "{key}" from "{dataname}".\n\nParsed:\n{val}\n\nReal:\n{kwds[key]}' + ) + assert type(kwds[key]) == type(val), ( # noqa: E721 + f'check type of {i}-th node of key "{key}" from "{dataname}".\n\nParsed:\n{type(val)}\n\nReal:\n{type(kwds[key])}' + ) + + assert root.env.filename == str(oname) + + +@pytest.mark.parametrize('dataname', get_datanames()) +def test_picklable(dataname): + oname = data_path(dataname, "org") + root = load(oname) + pickle.dumps(root) + + +def test_iter_node(): + root = loads(""" +* H1 +** H2 +*** H3 +* H4 +** H5 +""") + node = root[1] + assert node.heading == 'H1' + + by_iter = [n.heading for n in node] + assert by_iter == ['H1', 'H2', 'H3'] + + +def test_commented_headings_do_not_appear_as_children(): + root = loads("""\ +* H1 +#** H2 +** H3 +#* H4 +#** H5 +* H6 +""") + assert root.linenumber == 1 + top_level = root.children + assert len(top_level) == 2 + + h1 = top_level[0] + assert h1.heading == "H1" + assert h1.get_body() == "#** H2" + assert h1.linenumber == 1 + + [h3] = h1.children + assert h3.heading == "H3" + assert h3.get_body() == "#* H4\n#** H5" + assert h3.linenumber == 3 + + h6 = top_level[1] + assert h6.heading == "H6" + assert len(h6.children) == 0 + assert h6.linenumber == 6 + + +def test_commented_clock_entries_are_ignored_by_node_clock(): + root = loads("""\ +* Heading +# * Floss +# SCHEDULED: <2019-06-22 Sat 08:30 .+1w> +# :LOGBOOK: +# CLOCK: [2019-06-04 Tue 16:00]--[2019-06-04 Tue 17:00] => 1:00 +# :END: +""") + [node] = root.children[0] + assert node.heading == "Heading" + assert node.clock == [] + + +def test_commented_scheduled_marker_is_ignored_by_node_scheduled(): + root = loads("""\ +* Heading +# SCHEDULED: <2019-06-22 Sat 08:30 .+1w> +""") + [node] = root.children[0] + assert node.heading == "Heading" + assert node.scheduled.start is None + + +def test_commented_property_is_ignored_by_node_get_property(): + root = loads("""\ +* Heading +# :PROPERTIES: +# :PROPER-TEA: backup +# :END: +""") + [node] = root.children[0] + assert node.heading == "Heading" + assert node.get_property("PROPER-TEA") is None diff --git a/src/orgparse/tests/test_date.py b/src/orgparse/tests/test_date.py new file mode 100644 index 0000000..9764f97 --- /dev/null +++ b/src/orgparse/tests/test_date.py @@ -0,0 +1,48 @@ +import datetime + +from orgparse.date import ( + OrgDate, + OrgDateClock, + OrgDateClosed, + OrgDateDeadline, + OrgDateScheduled, +) + + +def test_date_as_string() -> None: + testdate = datetime.date(2021, 9, 3) + testdate2 = datetime.date(2021, 9, 5) + testdatetime = datetime.datetime(2021, 9, 3, 16, 19, 13) + testdatetime2 = datetime.datetime(2021, 9, 3, 17, 0, 1) + testdatetime_nextday = datetime.datetime(2021, 9, 4, 0, 2, 1) + + assert str(OrgDate(testdate)) == "<2021-09-03 Fri>" + assert str(OrgDate(testdatetime)) == "<2021-09-03 Fri 16:19>" + assert str(OrgDate(testdate, active=False)) == "[2021-09-03 Fri]" + assert str(OrgDate(testdatetime, active=False)) == "[2021-09-03 Fri 16:19]" + + assert str(OrgDate(testdate, testdate2)) == "<2021-09-03 Fri>--<2021-09-05 Sun>" + assert str(OrgDate(testdate, testdate2)) == "<2021-09-03 Fri>--<2021-09-05 Sun>" + assert str(OrgDate(testdatetime, testdatetime2)) == "<2021-09-03 Fri 16:19--17:00>" + assert str(OrgDate(testdate, testdate2, active=False)) == "[2021-09-03 Fri]--[2021-09-05 Sun]" + assert str(OrgDate(testdate, testdate2, active=False)) == "[2021-09-03 Fri]--[2021-09-05 Sun]" + assert str(OrgDate(testdatetime, testdatetime2, active=False)) == "[2021-09-03 Fri 16:19--17:00]" + + assert str(OrgDateScheduled(testdate)) == "<2021-09-03 Fri>" + assert str(OrgDateScheduled(testdatetime)) == "<2021-09-03 Fri 16:19>" + assert str(OrgDateDeadline(testdate)) == "<2021-09-03 Fri>" + assert str(OrgDateDeadline(testdatetime)) == "<2021-09-03 Fri 16:19>" + assert str(OrgDateClosed(testdate)) == "[2021-09-03 Fri]" + assert str(OrgDateClosed(testdatetime)) == "[2021-09-03 Fri 16:19]" + + assert str(OrgDateClock(testdatetime, testdatetime2)) == "[2021-09-03 Fri 16:19]--[2021-09-03 Fri 17:00]" + assert str(OrgDateClock(testdatetime, testdatetime_nextday)) == "[2021-09-03 Fri 16:19]--[2021-09-04 Sat 00:02]" + assert str(OrgDateClock(testdatetime)) == "[2021-09-03 Fri 16:19]" + + +def test_date_as_datetime() -> None: + testdate = (2021, 9, 3) + testdatetime = (2021, 9, 3, 16, 19, 13) + + assert OrgDate._as_datetime(datetime.date(*testdate)) == datetime.datetime(*testdate, 0, 0, 0) + assert OrgDate._as_datetime(datetime.datetime(*testdatetime)) == datetime.datetime(*testdatetime) diff --git a/src/orgparse/tests/test_hugedata.py b/src/orgparse/tests/test_hugedata.py new file mode 100644 index 0000000..b97a178 --- /dev/null +++ b/src/orgparse/tests/test_hugedata.py @@ -0,0 +1,26 @@ +import pickle + +from .. import loadi + + +def generate_org_lines(num_top_nodes, depth=3, nodes_per_level=1, _level=1): + if depth == 0: + return + for i in range(num_top_nodes): + yield ("*" * _level) + f' {i}-th heading of level {_level}' + yield from generate_org_lines(nodes_per_level, depth - 1, nodes_per_level, _level + 1) + + +def num_generate_org_lines(num_top_nodes, depth=3, nodes_per_level=1): + if depth == 0: + return 0 + return num_top_nodes * (1 + num_generate_org_lines(nodes_per_level, depth - 1, nodes_per_level)) + + +def test_picklable() -> None: + num = 1000 + depth = 3 + nodes_per_level = 1 + root = loadi(generate_org_lines(num, depth, nodes_per_level)) + assert sum(1 for _ in root) == num_generate_org_lines(num, depth, nodes_per_level) + 1 + pickle.dumps(root) # should not fail diff --git a/src/orgparse/tests/test_misc.py b/src/orgparse/tests/test_misc.py new file mode 100644 index 0000000..bb1382e --- /dev/null +++ b/src/orgparse/tests/test_misc.py @@ -0,0 +1,304 @@ +import io + +import pytest + +from orgparse.date import OrgDate + +from .. import load, loads +from ..node import OrgEnv + + +def test_empty_heading() -> None: + root = loads(''' +* TODO :sometag: + has no heading but still a todo? + it's a bit unclear, but seems to be highligted by emacs.. +''') + [h] = root.children + assert h.todo == 'TODO' + assert h.heading == '' + assert h.tags == {'sometag'} + + +def test_root() -> None: + root = loads( + ''' +#+STARTUP: hidestars +Whatever +# comment +* heading 1 + '''.strip() + ) + assert len(root.children) == 1 + # todo not sure if should strip special comments?? + assert root.body.endswith('Whatever\n# comment') + assert root.heading == '' + + +def test_stars(): + # https://github.com/karlicoss/orgparse/issues/7#issuecomment-533732660 + root = loads(""" +* Heading with text (A) + +The following line is not a heading, because it begins with a +star but has no spaces afterward, just a newline: + +* + +** Subheading with text (A1) + +*this_is_just* + + *some_bold_text* + +This subheading is a child of (A). + +The next heading has no text, but it does have a space after +the star, so it's a heading: + +* + +This text is under the "anonymous" heading above, which would be (B). + +** Subheading with text (B1) + +This subheading is a child of the "anonymous" heading (B), not of heading (A). + """) # noqa: W291 + [h1, h2] = root.children + assert h1.heading == 'Heading with text (A)' + assert h2.heading == '' + + +def test_parse_custom_todo_keys(): + todo_keys = ['TODO', 'CUSTOM1', 'ANOTHER_KEYWORD'] + done_keys = ['DONE', 'A'] + filename = '' # default for loads + content = """ +* TODO Heading with a default todo keyword + +* DONE Heading with a default done keyword + +* CUSTOM1 Heading with a custom todo keyword + +* ANOTHER_KEYWORD Heading with a long custom todo keyword + +* A Heading with a short custom done keyword + """ + + env = OrgEnv(todos=todo_keys, dones=done_keys, filename=filename) + root = loads(content, env=env) + + assert root.env.all_todo_keys == ['TODO', 'CUSTOM1', 'ANOTHER_KEYWORD', 'DONE', 'A'] + assert len(root.children) == 5 + assert root.children[0].todo == 'TODO' + assert root.children[1].todo == 'DONE' + assert root.children[2].todo == 'CUSTOM1' + assert root.children[3].todo == 'ANOTHER_KEYWORD' + assert root.children[4].todo == 'A' + + +def test_add_custom_todo_keys(): + todo_keys = ['CUSTOM_TODO'] + done_keys = ['CUSTOM_DONE'] + filename = '' # default for loads + content = """#+TODO: COMMENT_TODO | COMMENT_DONE + """ + + env = OrgEnv(filename=filename) + env.add_todo_keys(todos=todo_keys, dones=done_keys) + + # check that only the custom keys are know before parsing + assert env.all_todo_keys == ['CUSTOM_TODO', 'CUSTOM_DONE'] + + # after parsing, all keys are set + root = loads(content, filename, env) + assert root.env.all_todo_keys == ['CUSTOM_TODO', 'COMMENT_TODO', 'CUSTOM_DONE', 'COMMENT_DONE'] + + +def test_get_file_property() -> None: + content = """#+TITLE: Test: title + * Node 1 + test 1 + * Node 2 + test 2 + """ + + # after parsing, all keys are set + root = loads(content) + assert root.get_file_property('Nosuchproperty') is None + assert root.get_file_property_list('TITLE') == ['Test: title'] + # also it's case insensitive + assert root.get_file_property('title') == 'Test: title' + assert root.get_file_property_list('Nosuchproperty') == [] + + +def test_get_file_property_multivalued() -> None: + content = """ #+TITLE: Test + #+OTHER: Test title + #+title: alternate title + + * Node 1 + test 1 + * Node 2 + test 2 + """ + + # after parsing, all keys are set + root = loads(content) + + assert root.get_file_property_list('TITLE') == ['Test', 'alternate title'] + with pytest.raises(RuntimeError): + # raises because there are multiple of them + root.get_file_property('TITLE') + + +def test_filetags_are_tags() -> None: + content = ''' +#+FILETAGS: :f1:f2: + +* heading :h1: +** child :f2: + '''.strip() + root = loads(content) + # breakpoint() + assert root.tags == {'f1', 'f2'} + child = root.children[0].children[0] + assert child.tags == {'f1', 'f2', 'h1'} + + +def test_load_filelike() -> None: + stream = io.StringIO(''' +* heading1 +* heading 2 +''') + root = load(stream) + assert len(root.children) == 2 + assert root.env.filename == '' + + +def test_level_0_properties() -> None: + content = ''' +foo bar + +:PROPERTIES: +:PROP-FOO: Bar +:PROP-BAR: Bar bar +:END: + +* heading :h1: +:PROPERTIES: +:HEADING-PROP: foo +:END: +** child :f2: + '''.strip() + root = loads(content) + assert root.get_property('PROP-FOO') == 'Bar' + assert root.get_property('PROP-BAR') == 'Bar bar' + assert root.get_property('PROP-INVALID') is None + assert root.get_property('HEADING-PROP') is None + assert root.children[0].get_property('HEADING-PROP') == 'foo' + + +def test_level_0_timestamps() -> None: + content = ''' +foo bar + + - <2010-08-16 Mon> DateList + - <2010-08-07 Sat>--<2010-08-08 Sun> + - <2010-08-09 Mon 00:30>--<2010-08-10 Tue 13:20> RangeList + - <2019-08-10 Sat 16:30-17:30> TimeRange" + +* heading :h1: +** child :f2: + '''.strip() + root = loads(content) + assert root.datelist == [OrgDate((2010, 8, 16))] + assert root.rangelist == [ + OrgDate((2010, 8, 7), (2010, 8, 8)), + OrgDate((2010, 8, 9, 0, 30), (2010, 8, 10, 13, 20)), + OrgDate((2019, 8, 10, 16, 30, 0), (2019, 8, 10, 17, 30, 0)), + ] + + +def test_date_with_cookies() -> None: + testcases = [ + ('<2010-06-21 Mon +1y>', + "OrgDate((2010, 6, 21), None, True, ('+', 1, 'y'))"), + ('<2005-10-01 Sat +1m>', + "OrgDate((2005, 10, 1), None, True, ('+', 1, 'm'))"), + ('<2005-10-01 Sat +1m -3d>', + "OrgDate((2005, 10, 1), None, True, ('+', 1, 'm'), ('-', 3, 'd'))"), + ('<2005-10-01 Sat -3d>', + "OrgDate((2005, 10, 1), None, True, None, ('-', 3, 'd'))"), + ('<2008-02-10 Sun ++1w>', + "OrgDate((2008, 2, 10), None, True, ('++', 1, 'w'))"), + ('<2008-02-08 Fri 20:00 ++1d>', + "OrgDate((2008, 2, 8, 20, 0, 0), None, True, ('++', 1, 'd'))"), + ('<2019-04-05 Fri 08:00 .+1h>', + "OrgDate((2019, 4, 5, 8, 0, 0), None, True, ('.+', 1, 'h'))"), + ('[2019-04-05 Fri 08:00 .+1h]', + "OrgDate((2019, 4, 5, 8, 0, 0), None, False, ('.+', 1, 'h'))"), + ('<2007-05-16 Wed 12:30 +1w>', + "OrgDate((2007, 5, 16, 12, 30, 0), None, True, ('+', 1, 'w'))"), + ] # fmt: skip + for inp, expected in testcases: + root = loads(inp) + output = root[0].datelist[0] + assert str(output) == inp + assert repr(output) == expected + testcases = [ + ('<2006-11-02 Thu 20:00-22:00 +1w>', + "OrgDate((2006, 11, 2, 20, 0, 0), (2006, 11, 2, 22, 0, 0), True, ('+', 1, 'w'))"), + ('<2006-11-02 Thu 20:00--22:00 +1w>', + "OrgDate((2006, 11, 2, 20, 0, 0), (2006, 11, 2, 22, 0, 0), True, ('+', 1, 'w'))"), + ] # fmt: skip + for inp, expected in testcases: + root = loads(inp) + output = root[0].rangelist[0] + assert str(output) == "<2006-11-02 Thu 20:00--22:00 +1w>" + assert repr(output) == expected + # DEADLINE and SCHEDULED + testcases2 = [ + ('* TODO Pay the rent\nDEADLINE: <2005-10-01 Sat +1m>', + "<2005-10-01 Sat +1m>", + "OrgDateDeadline((2005, 10, 1), None, True, ('+', 1, 'm'))"), + ('* TODO Pay the rent\nDEADLINE: <2005-10-01 Sat +1m -3d>', + "<2005-10-01 Sat +1m -3d>", + "OrgDateDeadline((2005, 10, 1), None, True, ('+', 1, 'm'), ('-', 3, 'd'))"), + ('* TODO Pay the rent\nDEADLINE: <2005-10-01 Sat -3d>', + "<2005-10-01 Sat -3d>", + "OrgDateDeadline((2005, 10, 1), None, True, None, ('-', 3, 'd'))"), + ('* TODO Pay the rent\nDEADLINE: <2005-10-01 Sat ++1m>', + "<2005-10-01 Sat ++1m>", + "OrgDateDeadline((2005, 10, 1), None, True, ('++', 1, 'm'))"), + ('* TODO Pay the rent\nDEADLINE: <2005-10-01 Sat .+1m>', + "<2005-10-01 Sat .+1m>", + "OrgDateDeadline((2005, 10, 1), None, True, ('.+', 1, 'm'))"), + ] # fmt: skip + for inp, expected_str, expected_repr in testcases2: + root = loads(inp) + output = root[1].deadline + assert str(output) == expected_str + assert repr(output) == expected_repr + testcases2 = [ + ('* TODO Pay the rent\nSCHEDULED: <2005-10-01 Sat +1m>', + "<2005-10-01 Sat +1m>", + "OrgDateScheduled((2005, 10, 1), None, True, ('+', 1, 'm'))"), + ('* TODO Pay the rent\nSCHEDULED: <2005-10-01 Sat +1m -3d>', + "<2005-10-01 Sat +1m -3d>", + "OrgDateScheduled((2005, 10, 1), None, True, ('+', 1, 'm'), ('-', 3, 'd'))"), + ('* TODO Pay the rent\nSCHEDULED: <2005-10-01 Sat -3d>', + "<2005-10-01 Sat -3d>", + "OrgDateScheduled((2005, 10, 1), None, True, None, ('-', 3, 'd'))"), + ('* TODO Pay the rent\nSCHEDULED: <2005-10-01 Sat ++1m>', + "<2005-10-01 Sat ++1m>", + "OrgDateScheduled((2005, 10, 1), None, True, ('++', 1, 'm'))"), + ('* TODO Pay the rent\nSCHEDULED: <2005-10-01 Sat .+1m>', + "<2005-10-01 Sat .+1m>", + "OrgDateScheduled((2005, 10, 1), None, True, ('.+', 1, 'm'))"), + ] # fmt: skip + for inp, expected_str, expected_repr in testcases2: + root = loads(inp) + output = root[1].scheduled + assert str(output) == expected_str + assert repr(output) == expected_repr diff --git a/src/orgparse/tests/test_rich.py b/src/orgparse/tests/test_rich.py new file mode 100644 index 0000000..5171bb0 --- /dev/null +++ b/src/orgparse/tests/test_rich.py @@ -0,0 +1,89 @@ +''' +Tests for rich formatting: tables etc. +''' + +import pytest + +from .. import loads +from ..extra import Table + + +def test_table() -> None: + root = loads(''' +| | | | +| | "heading" | | +| | | | +|-------+-----------+-----| +| reiwf | fef | | +|-------+-----------+-----| +|-------+-----------+-----| +| aba | caba | 123 | +| yeah | | X | + + |------------------------+-------| + | when | count | + | datetime | int | + |------------------------+-------| + | | -1 | + | [2020-11-05 Thu 23:44] | | + | [2020-11-06 Fri 01:00] | 1 | + |------------------------+-------| + +some irrelevant text + +| simple | +|--------| +| value1 | +| value2 | + ''') + + [_gap1, t1, _gap2, t2, _gap3, t3, _gap4] = root.body_rich + + t1 = Table(root._lines[1:10]) + t2 = Table(root._lines[11:19]) + t3 = Table(root._lines[22:26]) + + assert ilen(t1.blocks) == 4 + assert list(t1.blocks)[2] == [] + assert ilen(t1.rows) == 6 + + with pytest.raises(RuntimeError): + list(t1.as_dicts) # not sure what should it be + + assert ilen(t2.blocks) == 2 + assert ilen(t2.rows) == 5 + assert list(t2.rows)[3] == ['[2020-11-05 Thu 23:44]', ''] + + assert ilen(t3.blocks) == 2 + assert list(t3.rows) == [['simple'], ['value1'], ['value2']] + assert t3.as_dicts.columns == ['simple'] + assert list(t3.as_dicts) == [{'simple': 'value1'}, {'simple': 'value2'}] + + +def test_table_2() -> None: + root = loads(''' +* item + +#+tblname: something +| date | value | comment | +|----------------------+-------+-------------------------------| +| 14.04.17 | 11 | aaaa | +| May 26 2017 08:00 | 12 | what + about + pluses? | +| May 26 09:00 - 10:00 | 13 | time is | + + some comment + +#+BEGIN_SRC python :var fname="plot.png" :var table=something :results file +fig.savefig(fname) +return fname +#+END_SRC + +#+RESULTS: +[[file:plot.png]] +''') + [_, t, _] = root.children[0].body_rich + assert ilen(t.as_dicts) == 3 + + +def ilen(x) -> int: + return len(list(x)) diff --git a/tox.ini b/tox.ini index 212aac4..a31cbab 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,67 @@ [tox] -envlist = py26, py27, py32 +minversion = 3.21 +# relies on the correct version of Python installed +envlist = ruff,tests,mypy,ty +# https://github.com/tox-dev/tox/issues/20#issuecomment-247788333 +# hack to prevent .tox from crapping to the project directory +toxworkdir = {env:TOXWORKDIR_BASE:}{toxinidir}/.tox + [testenv] -deps = - nose -commands = nosetests --with-doctest orgparse -changedir = {envtmpdir} +# TODO how to get package name from setuptools? +package_name = "orgparse" +pass_env = +# useful for tests to know they are running under ci + CI + CI_* +# respect user's cache dirs to prevent tox from crapping into project dir + PYTHONPYCACHEPREFIX + MYPY_CACHE_DIR + RUFF_CACHE_DIR + +set_env = +# do not add current working directory to pythonpath +# generally this is more robust and safer, prevents weird issues later on + PYTHONSAFEPATH=1 + +# default is 'editable', in which tox builds wheel first for some reason? not sure if makes much sense +package = uv-editable + + +[testenv:ruff] +skip_install = true +dependency_groups = testing +commands = + {envpython} -m ruff check \ + {posargs} + + +[testenv:tests] +dependency_groups = testing +commands = + # posargs allow test filtering, e.g. tox ... -- -k test_name + {envpython} -m pytest \ + --pyargs {[testenv]package_name} \ + {posargs} + + +[testenv:mypy] +dependency_groups = testing +commands = + {envpython} -m mypy --no-install-types \ + -p {[testenv]package_name} \ + --txt-report .coverage.mypy \ + --html-report .coverage.mypy \ + # this is for github actions to upload to codecov.io + # sadly xml coverage crashes on windows... so we need to disable it + {env:CI_MYPY_COVERAGE} \ + {posargs} + + +[testenv:ty] +dependency_groups = testing +extras = optional +deps = # any other dependencies (if needed) +commands = + {envpython} -m ty \ + check \ + {posargs}